This is the recording of R scripts.
setwd("~/Google Drive/My Drive/0_Iden2/KI-UT_doctoral_course/Data")
exp <- read.table("ExpressionTable.txt", header=TRUE, sep="\t", row.names = 1)
str(exp)
## 'data.frame': 29189 obs. of 138 variables:
## $ NG1f0 : int 319 11 1217 34 36 3 61 3 15 69 ...
## $ NG1h0 : int 591 17 1976 65 58 2 140 11 44 137 ...
## $ NG2f0 : int 418 19 866 45 58 12 65 11 62 139 ...
## $ NG2h0 : int 424 9 751 51 68 7 55 8 42 128 ...
## $ NG3f0 : int 542 3 2854 99 44 16 100 7 53 98 ...
## $ NG3h0 : int 1280 32 5270 152 126 21 205 13 77 370 ...
## $ NG4f0 : int 326 18 684 46 55 10 39 13 40 25 ...
## $ NG4h0 : int 349 23 822 21 46 19 39 15 29 20 ...
## $ NK13f0 : int 266 4 1111 21 31 2 57 3 11 72 ...
## $ NK13h0 : int 503 14 1849 72 55 5 122 7 33 112 ...
## $ NK14f0 : int 174 1 1409 15 22 4 8 2 17 31 ...
## $ NK14h0 : int 3183 79 18564 663 428 27 223 75 274 862 ...
## $ NK15f0 : int 84 9 548 32 25 2 50 3 15 55 ...
## $ NK15h0 : int 107 8 508 20 26 5 32 2 13 63 ...
## $ NK22f0 : int 70 8 631 25 27 1 42 2 10 48 ...
## $ NK22h0 : int 117 6 628 26 32 2 47 0 23 44 ...
## $ NK25f0 : int 1982 33 11739 425 315 14 631 92 221 644 ...
## $ NK25h0 : int 1697 88 8625 366 329 11 492 64 183 434 ...
## $ NK26f0 : int 156 4 741 37 26 3 58 2 17 64 ...
## $ NK26h0 : int 104 6 472 16 17 2 40 2 6 47 ...
## $ NK27f0 : int 86 4 586 16 22 2 30 2 10 35 ...
## $ NK27h0 : int 101 1 526 16 29 1 34 3 17 39 ...
## $ NK29f0 : int 1548 53 13590 237 246 23 490 28 163 303 ...
## $ NK29h0 : int 1194 31 5922 147 136 13 280 27 80 264 ...
## $ NK2f0 : int 899 13 4236 189 139 24 229 28 143 308 ...
## $ NK2h0 : int 657 33 2863 137 118 16 124 15 98 199 ...
## $ NK35f0 : int 114 8 659 23 19 2 57 5 13 56 ...
## $ NK35h0 : int 145 17 726 20 35 2 57 6 20 56 ...
## $ NK37f0 : int 151 9 776 14 14 5 27 4 6 24 ...
## $ NK37h0 : int 994 49 3289 95 262 70 190 35 126 232 ...
## $ NK38f0 : int 131 7 654 36 28 1 54 7 18 66 ...
## $ NK38h0 : int 64 5 275 13 15 2 27 2 6 20 ...
## $ NK39f0 : int 1042 16 6362 250 156 9 332 27 145 320 ...
## $ NK39h0 : int 2180 75 9627 357 385 92 605 64 295 435 ...
## $ NK3f0 : int 1 0 3 0 2 0 0 0 0 3 ...
## $ NK3h0 : int 8 0 6 0 2 0 1 0 3 0 ...
## $ NK40f0 : int 925 20 5698 168 172 11 252 43 76 260 ...
## $ NK40h0 : int 229 9 1477 40 31 2 87 6 27 57 ...
## $ NK44f0 : int 68 2 444 13 22 1 31 1 15 27 ...
## $ NK44h0 : int 44 2 260 11 9 1 8 3 11 16 ...
## $ NK7f0 : int 84 3 599 22 28 2 39 0 8 43 ...
## $ NK7h0 : int 61 0 330 16 14 1 19 4 10 23 ...
## $ NK8f0 : int 209 11 682 27 32 3 56 2 13 47 ...
## $ NK8h0 : int 410 15 1296 49 62 12 95 9 33 140 ...
## $ NK9f0 : int 1846 42 10980 374 290 14 635 59 175 616 ...
## $ NK9h0 : int 2787 106 13502 574 452 21 771 82 300 756 ...
## $ NO101f0: int 3495 119 21544 475 466 47 952 83 268 682 ...
## $ NO101f2: int 134 8 521 22 24 6 26 4 10 27 ...
## $ NO101h0: int 3886 122 19839 580 625 44 1019 106 304 816 ...
## $ NO101h2: int 75 6 252 13 10 3 11 0 6 16 ...
## $ NO108f0: int 1411 51 11755 224 180 15 424 29 60 214 ...
## $ NO108f2: int 166 5 593 21 21 2 41 4 14 26 ...
## $ NO108h0: int 1947 70 11126 226 170 32 459 66 114 303 ...
## $ NO108h2: int 235 6 610 36 29 2 30 5 19 84 ...
## $ NO117f0: int 103 3 971 16 13 2 22 3 3 14 ...
## $ NO117f2: int 127 4 564 17 10 3 32 3 10 21 ...
## $ NO117h0: int 140 1 901 19 11 3 38 4 7 25 ...
## $ NO117h2: int 244 3 742 25 50 1 34 12 24 73 ...
## $ NO121f0: int 2146 71 11364 319 335 40 591 73 160 459 ...
## $ NO121f2: int 1407 39 8913 304 176 14 156 36 201 367 ...
## $ NO121h0: int 3146 112 16724 487 555 42 820 93 232 667 ...
## $ NO121h2: int 221 5 1052 32 27 4 46 6 13 36 ...
## $ NO125f0: int 875 6 4823 273 186 12 24 14 110 253 ...
## $ NO125f2: int 133 4 469 14 22 3 41 2 8 36 ...
## $ NO125h0: int 339 9 1946 89 62 4 12 2 33 55 ...
## $ NO125h2: int 116 7 456 27 27 2 27 5 19 46 ...
## $ NO131f0: int 1458 46 11759 205 208 27 406 48 60 238 ...
## $ NO131f2: int 152 10 996 24 44 4 55 9 16 41 ...
## $ NO131h0: int 2138 84 11892 232 194 39 440 46 116 306 ...
## $ NO131h2: int 88 11 593 17 24 2 70 5 10 44 ...
## $ NO135f0: int 1775 53 8948 250 288 22 456 67 127 383 ...
## $ NO135f2: int 212 5 687 32 44 4 66 6 15 69 ...
## $ NO135h0: int 2202 60 10663 335 359 27 541 69 166 486 ...
## $ NO135h2: int 286 21 951 37 49 6 84 9 20 97 ...
## $ NO146f0: int 97 6 679 19 20 3 52 3 6 45 ...
## $ NO146f2: int 362 8 1904 46 50 5 106 7 35 75 ...
## $ NO146h0: int 129 12 597 31 40 5 60 8 19 62 ...
## $ NO146h2: int 860 16 2701 37 108 79 173 16 62 103 ...
## $ NO150f0: int 1437 42 12393 219 181 29 430 32 76 204 ...
## $ NO150f2: int 98 0 283 2 11 1 12 2 6 18 ...
## $ NO150h0: int 1854 86 11884 199 199 45 412 61 112 283 ...
## $ NO150h2: int 118 3 343 14 23 3 27 3 9 29 ...
## $ NO22f0 : int 220 5 247 13 37 8 26 1 25 37 ...
## $ NO22f2 : int 116 2 412 13 23 2 36 2 13 21 ...
## $ NO22h0 : int 141 16 144 27 23 1 15 0 12 32 ...
## $ NO22h2 : int 131 5 543 27 47 1 43 8 11 36 ...
## $ NO27f0 : int 318 11 420 75 50 1 24 12 69 67 ...
## $ NO27f2 : int 261 6 1066 19 44 4 55 3 15 85 ...
## $ NO27h0 : int 301 35 357 56 60 3 24 6 71 69 ...
## $ NO27h2 : int 261 13 1121 26 30 3 59 4 24 75 ...
## $ NO32f0 : int 371 32 2570 133 55 23 94 11 101 109 ...
## $ NO32f2 : int 226 15 1389 31 61 5 93 7 21 84 ...
## $ NO32h0 : int 440 66 3294 125 72 18 147 29 77 90 ...
## $ NO32h2 : int 223 19 1342 46 41 8 120 8 23 92 ...
## $ NO37f0 : int 196 3 1028 0 32 8 70 1 35 32 ...
## $ NO37f2 : int 7848 264 37123 1648 919 32 1764 158 568 1531 ...
## $ NO37h0 : int 163 4 1153 0 23 0 42 4 33 10 ...
## $ NO37h2 : int 5563 237 29208 988 508 32 1263 66 359 951 ...
## $ NO40f0 : int 279 4 1593 32 38 12 46 0 36 48 ...
## [list output truncated]
exp_norm <- apply(exp, 2, function(x) x/sum(x)*1000000)
head(exp_norm)
## NG1f0 NG1h0 NG2f0 NG2h0 NG3f0
## chr1:564464..564559,+ 85.0370058 96.1221464 16.4873620 21.4247380 113.1779663
## chr1:564927..564932,+ 2.9323105 2.7649348 0.7494255 0.4547704 0.6264463
## chr1:565020..565083,+ 324.4201756 321.3830139 34.1580275 37.9480619 595.9592543
## chr1:565114..565117,+ 9.0635053 10.5718097 1.7749552 2.5770322 20.6727282
## chr1:565152..565156,+ 9.5966527 9.4333071 2.2877201 3.4360429 9.1878792
## chr1:565315..565316,+ 0.7997211 0.3252865 0.4733214 0.3537103 3.3410470
## NG3h0 NG4f0 NG4h0 NK13f0 NK13h0
## chr1:564464..564559,+ 160.812788 19.3453016 17.7881609 76.8106153 91.8602337
## chr1:564927..564932,+ 4.020320 1.0681455 1.1722857 1.1550468 2.5567461
## chr1:565020..565083,+ 662.096401 40.5895285 41.8964708 320.8142618 337.6731054
## chr1:565114..565117,+ 19.096519 2.7297051 1.0703478 6.0639959 13.1489798
## chr1:565152..565156,+ 15.830009 3.2637779 2.3445714 8.9516131 10.0443595
## chr1:565315..565316,+ 2.638335 0.5934142 0.9684099 0.5775234 0.9131236
## NK14f0 NK14h0 NK15f0 NK15h0
## chr1:564464..564559,+ 152.5480789 133.167357 16.1100732 18.0544827
## chr1:564927..564932,+ 0.8767131 3.305128 1.7260793 1.3498679
## chr1:565020..565083,+ 1235.2887542 776.663155 105.0990491 85.7166094
## chr1:565114..565117,+ 13.1506965 27.737970 6.1371708 3.3746697
## chr1:565152..565156,+ 19.2876881 17.906261 4.7946647 4.3870706
## chr1:565315..565316,+ 3.5068524 1.129601 0.3835732 0.8436674
## NK22f0 NK22h0 NK25f0 NK25h0
## chr1:564464..564559,+ 23.5188342 25.9203037 113.1373466 109.8879894
## chr1:564927..564932,+ 2.6878668 1.3292463 1.8837197 5.6983754
## chr1:565020..565083,+ 212.0054913 139.1277840 670.0904699 558.5055441
## chr1:565114..565117,+ 8.3995836 5.7600675 24.2600264 23.7000613
## chr1:565152..565156,+ 9.0715503 7.0893138 17.9809607 21.3041535
## chr1:565315..565316,+ 0.3359833 0.4430821 0.7991538 0.7122969
## NK26f0 NK26h0 NK27f0 NK27h0
## chr1:564464..564559,+ 24.1738734 22.5222250 30.6120742 26.2861952
## chr1:564927..564932,+ 0.6198429 1.2993591 1.4238174 0.2602594
## chr1:565020..565083,+ 114.8258985 102.2162519 208.5892497 136.8964228
## chr1:565114..565117,+ 5.7335469 3.4649577 5.6952696 4.1641497
## chr1:565152..565156,+ 4.0289789 3.6815175 7.8309957 7.5475214
## chr1:565315..565316,+ 0.4648822 0.4331197 0.7119087 0.2602594
## NK29f0 NK29h0 NK2f0 NK2h0 NK35f0
## chr1:564464..564559,+ 68.472270 83.3017405 166.444617 146.265846 18.7411873
## chr1:564927..564932,+ 2.344335 2.1627755 2.406874 7.346686 1.3151710
## chr1:565020..565083,+ 601.122839 413.1598890 784.270743 637.380697 108.3372145
## chr1:565114..565117,+ 10.483158 10.2557419 34.992250 30.499880 3.7811167
## chr1:565152..565156,+ 10.881252 9.4883055 25.735041 26.269969 3.1235312
## chr1:565315..565316,+ 1.017353 0.9069704 4.443460 3.562030 0.3287928
## NK35h0 NK37f0 NK37h0 NK38f0 NK38h0
## chr1:564464..564559,+ 25.4211005 54.937539 177.875763 19.9262910 20.6741718
## chr1:564927..564932,+ 2.9804049 3.274423 8.768524 1.0647636 1.6151697
## chr1:565020..565083,+ 127.2808206 282.328013 588.564772 99.4793458 88.8343320
## chr1:565114..565117,+ 3.5063587 5.093547 17.000199 5.4759273 4.1994412
## chr1:565152..565156,+ 6.1361277 5.093547 46.884758 4.2590546 4.8455090
## chr1:565315..565316,+ 0.3506359 1.819124 12.526462 0.1521091 0.6460679
## NK39f0 NK39h0 NK3f0 NK3h0 NK40f0
## chr1:564464..564559,+ 113.8146769 124.594489 4.14776 29.145915 67.231020
## chr1:564927..564932,+ 1.7476342 4.286508 0.00000 0.000000 1.453644
## chr1:565020..565083,+ 694.9030467 550.216123 12.44328 21.859437 414.143081
## chr1:565114..565117,+ 27.3067843 20.403776 0.00000 0.000000 12.210607
## chr1:565152..565156,+ 17.0394334 22.004073 8.29552 7.286479 12.501336
## chr1:565315..565316,+ 0.9830442 5.258116 0.00000 0.000000 0.799504
## NK40h0 NK44f0 NK44h0 NK7f0
## chr1:564464..564559,+ 37.5137339 30.9654621 23.0919126 29.9408988
## chr1:564927..564932,+ 1.4743389 0.9107489 1.0496324 1.0693178
## chr1:565020..565083,+ 241.9553927 202.1862527 136.4522110 213.5071236
## chr1:565114..565117,+ 6.5526173 5.9198678 5.7729782 7.8416640
## chr1:565152..565156,+ 5.0782784 10.0182377 4.7233458 9.9802996
## chr1:565315..565316,+ 0.3276309 0.4553744 0.5248162 0.7128785
## NK7h0 NK8f0 NK8h0 NK9f0 NK9h0
## chr1:564464..564559,+ 24.6677337 52.2848740 98.023277 113.541399 112.5067410
## chr1:564927..564932,+ 0.0000000 2.7518355 3.586217 2.583282 4.2790508
## chr1:565020..565083,+ 133.4483955 170.6137994 309.849188 675.343747 545.0541863
## chr1:565114..565117,+ 6.4702252 6.7545053 11.714977 23.003512 23.1714637
## chr1:565152..565156,+ 5.6614471 8.0053396 14.823032 17.836948 18.2465185
## chr1:565315..565316,+ 0.4043891 0.7505006 2.868974 0.861094 0.8477365
## NO101f0 NO101f2 NO101h0 NO101h2 NO108f0
## chr1:564464..564559,+ 92.990308 41.196937 90.293699 29.080645 85.3199447
## chr1:564927..564932,+ 3.166194 2.459519 2.834748 2.326452 3.0838534
## chr1:565020..565083,+ 573.214077 160.176151 460.971873 97.710966 710.7979797
## chr1:565114..565117,+ 12.638168 6.763676 13.476672 5.040645 13.5447680
## chr1:565152..565156,+ 12.398708 7.378556 14.522275 3.877419 10.8841885
## chr1:565315..565316,+ 1.250513 1.844639 1.022368 1.163226 0.9070157
## NO108f2 NO108h0 NO108h2 NO117f0 NO117f2
## chr1:564464..564559,+ 51.8636062 106.696893 37.6186769 26.9517587 58.444251
## chr1:564927..564932,+ 1.5621568 3.836046 0.9604769 0.7850027 1.840764
## chr1:565020..565083,+ 185.2717981 609.712190 97.6484805 254.0792010 259.547697
## chr1:565114..565117,+ 6.5610586 12.384950 5.7628611 4.1866810 7.823246
## chr1:565152..565156,+ 6.5610586 9.316113 4.6423048 3.4016783 4.601910
## chr1:565315..565316,+ 0.6248627 1.753621 0.3201590 0.5233351 1.380573
## NO117h0 NO117h2 NO121f0 NO121f2 NO121h0
## chr1:564464..564559,+ 40.7711454 46.5489487 94.812057 120.399758 93.824868
## chr1:564927..564932,+ 0.2912225 0.5723231 3.136839 3.337307 3.340237
## chr1:565020..565083,+ 262.3914432 141.5545899 502.070932 762.702945 498.768944
## chr1:565114..565117,+ 5.5332269 4.7693595 14.093684 26.013878 14.524066
## chr1:565152..565156,+ 3.2034471 9.5387190 14.800577 15.060666 16.552067
## chr1:565315..565316,+ 0.8736674 0.1907744 1.767233 1.198008 1.252589
## NO121h2 NO125f0 NO125f2 NO125h0 NO125h2
## chr1:564464..564559,+ 85.072999 54.8659515 51.280727 33.3425525 41.1220069
## chr1:564927..564932,+ 1.924728 0.3762237 1.542277 0.8852005 2.4815004
## chr1:565020..565083,+ 404.962874 302.4211247 180.832036 191.4000213 161.6520270
## chr1:565114..565117,+ 12.318262 17.1181769 5.397971 8.7536495 9.5715016
## chr1:565152..565156,+ 10.393534 11.6629337 8.482526 6.0980480 9.5715016
## chr1:565315..565316,+ 1.539783 0.7524473 1.156708 0.3934224 0.7090001
## NO131f0 NO131f2 NO131h0 NO131h2 NO135f0
## chr1:564464..564559,+ 91.204102 53.646668 110.413946 23.2833984 84.481794
## chr1:564927..564932,+ 2.877496 3.529386 4.338060 2.9104248 2.522555
## chr1:565020..565083,+ 735.575474 351.526848 614.145297 156.8983550 425.883432
## chr1:565114..565117,+ 12.823622 8.470526 11.981308 4.4979292 11.898844
## chr1:565152..565156,+ 13.011285 15.529298 10.018852 6.3500177 13.707469
## chr1:565315..565316,+ 1.688965 1.411754 2.014099 0.5291681 1.047098
## NO135f2 NO135h0 NO135h2 NO146f0 NO146f2
## chr1:564464..564559,+ 64.568279 88.872540 70.715482 18.4570198 32.8677851
## chr1:564927..564932,+ 1.522837 2.421595 5.192396 1.1416713 0.7263599
## chr1:565020..565083,+ 209.237771 430.357810 235.141342 129.1991385 172.8736541
## chr1:565114..565117,+ 9.746155 13.520573 9.148506 3.6152925 4.1765694
## chr1:565152..565156,+ 13.400964 14.489211 12.115590 3.8055711 4.5397493
## chr1:565315..565316,+ 1.218269 1.089718 1.483542 0.5708357 0.4539749
## NO146h0 NO146h2 NO150f0 NO150f2 NO150h0
## chr1:564464..564559,+ 19.8809271 85.412965 84.974116 72.4338819 92.887684
## chr1:564927..564932,+ 1.8493886 1.589078 2.483586 0.0000000 4.308706
## chr1:565020..565083,+ 92.0070813 268.256301 732.835229 209.1713120 595.403042
## chr1:565114..565117,+ 4.7775871 3.674744 12.950126 1.4782425 9.970145
## chr1:565152..565156,+ 6.1646286 10.726279 10.703072 8.1303337 9.970145
## chr1:565315..565316,+ 0.7705786 7.846075 1.714857 0.7391212 2.254555
## NO150h2 NO22f0 NO22f2 NO22h0 NO22h2
## chr1:564464..564559,+ 59.229752 13.4081925 50.2076910 12.43276278 40.3542798
## chr1:564927..564932,+ 1.505841 0.3047316 0.8656498 1.41080996 1.5402397
## chr1:565020..565083,+ 172.167839 15.0537434 178.3238682 12.69728965 167.2700299
## chr1:565114..565117,+ 7.027259 0.7923023 5.6267240 2.38074181 8.3172943
## chr1:565152..565156,+ 11.544782 2.2550142 9.9549732 2.02803932 14.4782530
## chr1:565315..565316,+ 1.505841 0.4875706 0.8656498 0.08817562 0.3080479
## NO27f0 NO27f2 NO27h0 NO27h2 NO32f0
## chr1:564464..564559,+ 27.88429980 48.5280115 27.3164385 35.452501 36.607313
## chr1:564927..564932,+ 0.96455125 1.1155865 3.1763301 1.765833 3.157504
## chr1:565020..565083,+ 36.82832049 198.2025299 32.3985666 152.269171 253.587048
## chr1:565114..565117,+ 6.57648580 3.5326905 5.0821281 3.531667 13.123376
## chr1:565152..565156,+ 4.38432387 8.1809675 5.4451372 4.075000 5.426960
## chr1:565315..565316,+ 0.08768648 0.7437243 0.2722569 0.407500 2.269456
## NO32f2 NO32h0 NO32h2 NO37f0 NO37f2
## chr1:564464..564559,+ 55.931706 34.455092 27.999092 34.0869565 240.0003229
## chr1:564927..564932,+ 3.712281 5.168264 2.385573 0.5217391 8.0734054
## chr1:565020..565083,+ 343.757254 257.943347 168.496778 178.7826087 1135.2614664
## chr1:565114..565117,+ 7.672048 9.788378 5.775597 0.0000000 50.3976213
## chr1:565152..565156,+ 15.096611 5.638106 5.147815 5.5652174 28.1040134
## chr1:565315..565316,+ 1.237427 1.409526 1.004452 1.3913043 0.9785946
## NO37h0 NO37h2 NO40f0 NO40f2 NO40h0
## chr1:564464..564559,+ 45.183470 193.115460 60.2516315 34.0782968 82.623436
## chr1:564927..564932,+ 1.108797 8.227281 0.8638227 1.3364038 4.551875
## chr1:565020..565083,+ 319.610679 1013.934272 344.0173801 133.8631135 407.737692
## chr1:565114..565117,+ 0.000000 34.297694 6.9105814 5.3456152 9.379622
## chr1:565152..565156,+ 6.375582 17.634847 8.2063154 11.1366983 9.793429
## chr1:565315..565316,+ 0.000000 1.110857 2.5914680 0.4454679 2.482841
## NO40h2 NO47f0 NO47f2 NO47h0 NO47h2
## chr1:564464..564559,+ 31.8895500 23.255418 73.6585095 13.7639659 63.4373990
## chr1:564927..564932,+ 1.4981668 3.605491 0.6037583 1.4488385 1.0133770
## chr1:565020..565083,+ 80.2589346 28.843929 313.9543027 13.4346844 200.6486423
## chr1:565114..565117,+ 4.4945003 3.515354 8.7544950 3.0952459 7.2963143
## chr1:565152..565156,+ 7.2768101 6.039198 10.8676489 2.9635333 10.9444714
## chr1:565315..565316,+ 0.4280477 1.442196 2.7169122 0.7902756 0.6080262
## NO48f0 NO48f2 NO48h0 NO48h2 NO50f0
## chr1:564464..564559,+ 10.2808841 30.874208 11.5356128 23.5728598 85.848666
## chr1:564927..564932,+ 0.6209930 4.749878 0.6819081 2.1608455 2.528875
## chr1:565020..565083,+ 58.4423412 154.031764 52.2796246 140.2585157 635.147029
## chr1:565114..565117,+ 1.9319782 7.124817 2.3866785 4.1252505 15.838746
## chr1:565152..565156,+ 1.0349883 5.089155 2.0457244 4.7145720 15.705647
## chr1:565315..565316,+ 0.4139953 1.017831 0.7387338 0.5893215 5.190850
## NO50f2 NO50h0 NO50h2 NO51f0 NO51f2
## chr1:564464..564559,+ 58.7403985 120.765296 47.4187427 118.163382 96.892922
## chr1:564927..564932,+ 1.9196209 4.378625 1.9411181 3.132041 1.524539
## chr1:565020..565083,+ 250.3185611 535.887173 150.5753058 699.394349 325.234982
## chr1:565114..565117,+ 9.5981043 16.102039 6.1006570 14.900925 8.469661
## chr1:565152..565156,+ 9.5981043 19.774434 11.3694061 9.680855 11.688132
## chr1:565315..565316,+ 0.3839242 2.471804 0.5546052 3.416772 1.863325
## NO51h0 NO51h2 NO56f0 NO56f2 NO56h0
## chr1:564464..564559,+ 106.727657 72.094730 37.077257 94.2028304 29.7141724
## chr1:564927..564932,+ 5.681320 3.425668 1.678066 0.6408356 2.1224409
## chr1:565020..565083,+ 755.209771 341.788192 131.688190 318.2816720 130.6059159
## chr1:565114..565117,+ 15.319274 10.588427 10.388025 9.3989219 6.5947270
## chr1:565152..565156,+ 13.391683 14.169807 4.394934 9.8261456 4.3206832
## chr1:565315..565316,+ 3.449373 0.934273 4.315026 1.0680593 0.6822131
## NO56h2 NO68f0 NO68f2 NO68h0 NO68h2
## chr1:564464..564559,+ 72.9479322 179.921438 48.2781357 200.341903 48.298057
## chr1:564927..564932,+ 4.4264522 2.345345 0.2235099 4.579243 1.145408
## chr1:565020..565083,+ 327.0262882 722.254461 210.5463140 1159.693415 197.582959
## chr1:565114..565117,+ 10.9776014 43.668083 6.2582769 34.760621 11.454085
## chr1:565152..565156,+ 11.6858338 27.362354 6.9288065 16.443647 9.545070
## chr1:565315..565316,+ 0.8852904 2.568711 0.8940396 1.248885 0.954507
## NO73f0 NO73f2 NO73h0 NO73h2 NO77f0
## chr1:564464..564559,+ 88.353003 84.0010977 196.691114 70.0514045 96.454040
## chr1:564927..564932,+ 1.827993 0.7601909 4.972528 0.9127219 2.810168
## chr1:565020..565083,+ 502.088787 225.3966106 843.672278 180.4907523 566.632000
## chr1:565114..565117,+ 19.498594 3.0407637 42.818993 5.9326922 14.192766
## chr1:565152..565156,+ 15.537942 6.0815274 45.581508 17.3417158 12.830261
## chr1:565315..565316,+ 4.874648 1.1402864 7.182541 1.5972633 1.532819
## NO77f2 NO77h0 NO77h2 NO89f0 NO89f2
## chr1:564464..564559,+ 101.484273 92.191732 69.964272 88.732834 170.615163
## chr1:564927..564932,+ 1.301080 2.898106 5.230974 3.348409 5.070287
## chr1:565020..565083,+ 321.180994 448.248068 325.846066 718.977783 1066.978557
## chr1:565114..565117,+ 7.434745 12.915970 10.897862 12.897575 28.520365
## chr1:565152..565156,+ 12.639067 14.034137 12.859477 12.029469 23.766971
## chr1:565315..565316,+ 1.672818 1.163807 1.307743 1.364167 1.774601
## NO89h0 NO89h2
## chr1:564464..564559,+ 104.729658 51.9005943
## chr1:564927..564932,+ 4.994407 0.8151926
## chr1:565020..565083,+ 613.241834 284.5022108
## chr1:565114..565117,+ 10.039777 7.8801950
## chr1:565152..565156,+ 10.192667 13.3148122
## chr1:565315..565316,+ 2.140460 2.4455777
write.table(exp_norm, file = "ExpressionTable_norm.tsv", sep="\t", quote = FALSE)
column is not right, so i adjusted and added “TC” to column1,row1 manually.
To make PCA, merge cohort “newcond3” column.
# === Step 1: データ読み込み ===
# Expression matrix: 正しく row.names に"TC"列を指定
exp_norm <- read.table("ExpressionTable_norm.tsv", sep="\t", header=TRUE, row.names=1, check.names=FALSE, stringsAsFactors=FALSE)
# Cohort: Cond → newcond3 の対応表
cohort <- read.delim("Cohort.txt", header=TRUE, stringsAsFactors=FALSE)
# === Step 2: 置換用マッピングを作成 ===
label_map <- setNames(cohort$newcond3, cohort$Cond)
# === Step 3: 置換処理 ===
# 元の列名を取得(例:NG1f0, NG1h0, ...)
old_colnames <- colnames(exp_norm)
# 対応するものがあれば newcond3 に置換、なければそのまま
new_colnames <- ifelse(old_colnames %in% names(label_map), label_map[old_colnames], old_colnames)
# 列名を更新
colnames(exp_norm) <- new_colnames
# === Step 4: 正しく出力 ===
write.table(exp_norm, file="ExpressionTable_norm_renamed.tsv", sep="\t", quote=FALSE, row.names=TRUE, col.names=NA)
Do PCA analysis.
# === ライブラリ読み込み ===
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
# === データ読み込み ===
exp <- read.table("ExpressionTable_norm_renamed.tsv", sep = "\t", header = TRUE, row.names = 1, check.names = FALSE)
# === PCA準備 ===
exp_t <- t(exp)
pca <- prcomp(exp_t, scale. = TRUE)
# === PCA結果データフレーム ===
pca_df <- as.data.frame(pca$x)
pca_df$Sample <- rownames(pca_df)
# === グループとタイプ(f/h)を抽出 ===
pca_df$Group <- sub("^(NO|OB|POB).*", "\\1", pca_df$Sample)
pca_df$Type <- ifelse(grepl("f", pca_df$Sample), "f",
ifelse(grepl("h", pca_df$Sample), "h", "other"))
# === 透明度の設定 ===
pca_df$Alpha <- ifelse(pca_df$Type == "f", 0.3,
ifelse(pca_df$Type == "h", 0.8, 1.0))
# === 色マップ設定 ===
color_map <- c("NO" = "dodgerblue", "OB" = "orange", "POB" = "forestgreen")
centroid_fill_map <- c("NO.f" = "dodgerblue", "NO.h" = "dodgerblue",
"OB.f" = "orange", "OB.h" = "orange",
"POB.f" = "forestgreen", "POB.h" = "forestgreen")
# === f/hの重心を計算 ===
centroids <- pca_df %>%
filter(Type %in% c("f", "h")) %>%
group_by(Group, Type) %>%
summarise(PC1 = mean(PC1), PC2 = mean(PC2), .groups = "drop") %>%
mutate(ID = paste(Group, Type, sep = "."),
Alpha = ifelse(Type == "f", 0.3, 0.8))
# === 矢印用データ:f と h の重心を結ぶペア ===
arrow_data <- centroids %>%
select(Group, Type, PC1, PC2) %>%
pivot_wider(names_from = Type, values_from = c(PC1, PC2)) %>%
filter(!is.na(PC1_f) & !is.na(PC1_h)) %>%
mutate(x_start = PC1_f, y_start = PC2_f,
x_end = PC1_h, y_end = PC2_h)
# === プロット ===
p <- ggplot(pca_df, aes(x = PC1, y = PC2)) +
geom_point(aes(color = Group, alpha = Alpha), size = 3) +
scale_color_manual(values = color_map) +
scale_alpha_identity() +
# 四角い重心ポイント(f: 透明度低く, h: 高く)
geom_point(data = centroids,
aes(x = PC1, y = PC2, fill = ID, alpha = Alpha),
shape = 22, size = 5, color = "black", stroke = 1.2, show.legend = FALSE) +
scale_fill_manual(values = centroid_fill_map) +
# 矢印追加
geom_segment(data = arrow_data,
aes(x = x_start, y = y_start, xend = x_end, yend = y_end),
arrow = arrow(length = unit(0.25, "cm")),
color = "black", size = 1) +
# 軸・タイトルなど
theme_minimal(base_size = 14) +
labs(title = "PCA with Group Centroids and Arrows",
x = paste0("PC1 (", round(summary(pca)$importance[2,1]*100, 1), "%)"),
y = paste0("PC2 (", round(summary(pca)$importance[2,2]*100, 1), "%)")) +
theme(legend.position = "right")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# === 描画 ===
print(p)
Prepare for the edgeR differential expression analysis. Replace with sample name.
# === Step 1: データ読み込み ===
# Expression matrix: 正しく row.names に"TC"列を指定
exp <- read.table("ExpressionTable.txt", sep="\t", header=TRUE, row.names=1, check.names=FALSE, stringsAsFactors=FALSE)
# Cohort: Cond → newcond3 の対応表
cohort <- read.delim("Cohort.txt", header=TRUE, stringsAsFactors=FALSE)
# === Step 2: 置換用マッピングを作成 ===
label_map <- setNames(cohort$newcond3, cohort$Cond)
# === Step 3: 置換処理 ===
# 元の列名を取得(例:NG1f0, NG1h0, ...)
old_colnames <- colnames(exp)
# 対応するものがあれば newcond3 に置換、なければそのまま
new_colnames <- ifelse(old_colnames %in% names(label_map), label_map[old_colnames], old_colnames)
# 列名を更新
colnames(exp) <- new_colnames
# === Step 4: 正しく出力 ===
write.table(exp, file="ExpressionTable_renamed.tsv", sep="\t", quote=FALSE, row.names=TRUE, col.names=NA)
edgeR
library(edgeR)
## Warning: package 'edgeR' was built under R version 4.4.2
## Loading required package: limma
## Warning: package 'limma' was built under R version 4.4.2
library(dplyr)
counts <- read.table("ExpressionTable.txt", sep="\t", header=TRUE, row.names=1, check.names=FALSE, stringsAsFactors=FALSE)
cohort <- read.table("Cohort.txt", header = TRUE, sep = "\t", stringsAsFactors = FALSE)
run_edgeR_comparison <- function(group_label, cohort, counts) {
# サンプル選定
samples <- cohort %>% filter(newcond3 %in% c(paste0(group_label, ".f"), paste0(group_label, ".h")))
group <- factor(samples$newcond3, levels = c(paste0(group_label, ".f"), paste0(group_label, ".h")))
colnames(counts) <- make.names(colnames(counts)) # Rの安全な名前に変換
selected_counts <- counts[, make.names(samples$Cond)]
# DGEListを作成
dge <- DGEList(counts = selected_counts, group = group)
keep <- filterByExpr(dge)
dge <- dge[keep, , keep.lib.sizes = FALSE]
dge <- calcNormFactors(dge)
# モデル作成と検定
design <- model.matrix(~group)
dge <- estimateDisp(dge, design)
fit <- glmQLFit(dge, design)
qlf <- glmQLFTest(fit, coef = 2)
# 結果出力
result <- topTags(qlf, n = Inf)$table
result$FDR <- p.adjust(result$PValue, method = "BH")
result <- result[order(result$FDR), ]
return(result)
}
# NO (Normal) 比較
res_NO <- run_edgeR_comparison("NO", cohort, counts)
# OB (Obese) 比較
res_OB <- run_edgeR_comparison("OB", cohort, counts)
# POB (Pre-Obese) 比較
res_POB <- run_edgeR_comparison("POB", cohort, counts)
# 保存
write.csv(res_NO, "edgeR_NO_h_vs_f.csv")
write.csv(res_OB, "edgeR_OB_h_vs_f.csv")
write.csv(res_POB, "edgeR_POB_h_vs_f.csv")
volcano plot
library(ggplot2)
plot_volcano <- function(res, title = "Volcano Plot", logFC_threshold = 1, FDR_threshold = 0.05) {
res$gene <- rownames(res)
res$Significant <- with(res, ifelse(FDR < FDR_threshold & abs(logFC) >= logFC_threshold,
ifelse(logFC > 0, "Up", "Down"),
"Not Significant"))
ggplot(res, aes(x = logFC, y = -log10(FDR), color = Significant)) +
geom_point(alpha = 0.7) +
scale_color_manual(values = c("Up" = "red", "Down" = "blue", "Not Significant" = "grey")) +
theme_minimal() +
labs(title = title,
x = "log2 Fold Change",
y = "-log10 FDR") +
geom_vline(xintercept = c(-logFC_threshold, logFC_threshold), linetype = "dashed", color = "black") +
geom_hline(yintercept = -log10(FDR_threshold), linetype = "dashed", color = "black")
}
# NO群のvolcano plot
plot_volcano(res_NO, title = "NO.h vs NO.f")
# OB群
plot_volcano(res_OB, title = "OB.h vs OB.f")
# POB群
plot_volcano(res_POB, title = "POB.h vs POB.f")
edgeR using design matrix (better way than comparing h vs f for each)
library(edgeR)
library(dplyr)
# データ読み込み
counts <- read.table("ExpressionTable.txt", sep="\t", header=TRUE, row.names=1, check.names=FALSE, stringsAsFactors=FALSE)
cohort <- read.table("Cohort.txt", header = TRUE, sep = "\t", stringsAsFactors = FALSE)
# 一致するサンプルのみ抽出
common_samples <- intersect(colnames(counts), cohort$Cond)
counts <- counts[, common_samples]
cohort <- cohort[cohort$Cond %in% common_samples, ]
# group(NO.f, NO.h, OB.f, OB.h, POB.f, POB.h など)を因子として定義
group <- factor(cohort$newcond3)
# デザインマトリクスの作成
design <- model.matrix(~ 0 + group) # インターセプトなしで各群を列に
colnames(design) <- levels(group) # 列名を分かりやすく
dge <- DGEList(counts = counts, group = group)
keep <- filterByExpr(dge, design)
dge <- dge[keep, , keep.lib.sizes = FALSE]
dge <- calcNormFactors(dge)
dge <- estimateDisp(dge, design)
fit <- glmQLFit(dge, design)
# 比較したいペア(h vs f)
contrast_NO <- makeContrasts(NO.h - NO.f, levels = design)
contrast_OB <- makeContrasts(OB.h - OB.f, levels = design)
contrast_POB <- makeContrasts(POB.h - POB.f, levels = design)
qlf_NO <- glmQLFTest(fit, contrast = contrast_NO)
qlf_OB <- glmQLFTest(fit, contrast = contrast_OB)
qlf_POB <- glmQLFTest(fit, contrast = contrast_POB)
# 結果取得
res_NO <- topTags(qlf_NO, n = Inf)$table
res_OB <- topTags(qlf_OB, n = Inf)$table
res_POB <- topTags(qlf_POB, n = Inf)$table
# 保存
write.csv(res_NO, "edgeR_NO_h_vs_f_2.csv")
write.csv(res_OB, "edgeR_OB_h_vs_f_2.csv")
write.csv(res_POB, "edgeR_POB_h_vs_f_2.csv")
# plot_volcano function that i defined earlier
plot_volcano(res_NO, title = "NO.h vs NO.f")
plot_volcano(res_OB, title = "OB.h vs OB.f")
plot_volcano(res_POB, title = "POB.h vs POB.f")
PCA against DEG only prep for PCA
# 正規化済みの発現データ
norm_expr <- read.table("ExpressionTable_norm_renamed.tsv", sep="\t", header=TRUE, row.names=1, check.names=FALSE)
# DEG結果(例: res_NO)
deg_res <- read.csv("edgeR_NO_h_vs_f.csv", row.names = 1)
deg_genes <- rownames(deg_res[deg_res$FDR < 0.05, ])
deg_expr <- norm_expr[rownames(norm_expr) %in% deg_genes, ]
write.table(deg_expr, "ExpressionTable_norm_renamed_DEG_NO.tsv", sep="\t", quote=FALSE, col.names=NA)
deg_res <- read.csv("edgeR_OB_h_vs_f.csv", row.names = 1)
deg_genes <- rownames(deg_res[deg_res$FDR < 0.05, ])
deg_expr <- norm_expr[rownames(norm_expr) %in% deg_genes, ]
write.table(deg_expr, "ExpressionTable_norm_renamed_DEG_OB.tsv", sep="\t", quote=FALSE, col.names=NA)
deg_res <- read.csv("edgeR_POB_h_vs_f.csv", row.names = 1)
deg_genes <- rownames(deg_res[deg_res$FDR < 0.05, ])
deg_expr <- norm_expr[rownames(norm_expr) %in% deg_genes, ]
write.table(deg_expr, "ExpressionTable_norm_renamed_DEG_POB.tsv", sep="\t", quote=FALSE, col.names=NA)
# 全てのDEG
deg_NO <- rownames(res_NO[res_NO$FDR < 0.05, ])
deg_OB <- rownames(res_OB[res_OB$FDR < 0.05, ])
deg_POB <- rownames(res_POB[res_POB$FDR < 0.05, ])
# 全てのDEGを結合してユニークに(重複を除く)
deg_all <- unique(c(deg_NO, deg_OB, deg_POB))
deg_expr <- norm_expr[rownames(norm_expr) %in% deg_all, ]
write.table(deg_expr, "ExpressionTable_norm_renamed_DEG.tsv", sep="\t", quote=FALSE, col.names=NA)
PCA using the function i defined earlier NO
# === ライブラリ読み込み ===
library(ggplot2)
library(dplyr)
library(tidyr)
# === データ読み込み ===
exp <- read.table("ExpressionTable_norm_renamed_DEG_NO.tsv", sep = "\t", header = TRUE, row.names = 1, check.names = FALSE)
# === PCA準備 ===
exp_t <- t(exp)
pca <- prcomp(exp_t, scale. = TRUE)
# === PCA結果データフレーム ===
pca_df <- as.data.frame(pca$x)
pca_df$Sample <- rownames(pca_df)
# === グループとタイプ(f/h)を抽出 ===
pca_df$Group <- sub("^(NO|OB|POB).*", "\\1", pca_df$Sample)
pca_df$Type <- ifelse(grepl("f", pca_df$Sample), "f",
ifelse(grepl("h", pca_df$Sample), "h", "other"))
# === 透明度の設定 ===
pca_df$Alpha <- ifelse(pca_df$Type == "f", 0.3,
ifelse(pca_df$Type == "h", 0.8, 1.0))
# === 色マップ設定 ===
color_map <- c("NO" = "dodgerblue", "OB" = "orange", "POB" = "forestgreen")
centroid_fill_map <- c("NO.f" = "dodgerblue", "NO.h" = "dodgerblue",
"OB.f" = "orange", "OB.h" = "orange",
"POB.f" = "forestgreen", "POB.h" = "forestgreen")
# === f/hの重心を計算 ===
centroids <- pca_df %>%
filter(Type %in% c("f", "h")) %>%
group_by(Group, Type) %>%
summarise(PC1 = mean(PC1), PC2 = mean(PC2), .groups = "drop") %>%
mutate(ID = paste(Group, Type, sep = "."),
Alpha = ifelse(Type == "f", 0.3, 0.8))
# === 矢印用データ:f と h の重心を結ぶペア ===
arrow_data <- centroids %>%
select(Group, Type, PC1, PC2) %>%
pivot_wider(names_from = Type, values_from = c(PC1, PC2)) %>%
filter(!is.na(PC1_f) & !is.na(PC1_h)) %>%
mutate(x_start = PC1_f, y_start = PC2_f,
x_end = PC1_h, y_end = PC2_h)
# === プロット ===
p <- ggplot(pca_df, aes(x = PC1, y = PC2)) +
geom_point(aes(color = Group, alpha = Alpha), size = 3) +
scale_color_manual(values = color_map) +
scale_alpha_identity() +
# 四角い重心ポイント(f: 透明度低く, h: 高く)
geom_point(data = centroids,
aes(x = PC1, y = PC2, fill = ID, alpha = Alpha),
shape = 22, size = 5, color = "black", stroke = 1.2, show.legend = FALSE) +
scale_fill_manual(values = centroid_fill_map) +
# 矢印追加
geom_segment(data = arrow_data,
aes(x = x_start, y = y_start, xend = x_end, yend = y_end),
arrow = arrow(length = unit(0.25, "cm")),
color = "black", size = 1) +
# 軸・タイトルなど
theme_minimal(base_size = 14) +
labs(title = "PCA with Group Centroids and Arrows",
x = paste0("PC1 (", round(summary(pca)$importance[2,1]*100, 1), "%)"),
y = paste0("PC2 (", round(summary(pca)$importance[2,2]*100, 1), "%)")) +
theme(legend.position = "right")
# === 描画 ===
print(p)
OB
# === ライブラリ読み込み ===
library(ggplot2)
library(dplyr)
library(tidyr)
# === データ読み込み ===
exp <- read.table("ExpressionTable_norm_renamed_DEG_OB.tsv", sep = "\t", header = TRUE, row.names = 1, check.names = FALSE)
# === PCA準備 ===
exp_t <- t(exp)
pca <- prcomp(exp_t, scale. = TRUE)
# === PCA結果データフレーム ===
pca_df <- as.data.frame(pca$x)
pca_df$Sample <- rownames(pca_df)
# === グループとタイプ(f/h)を抽出 ===
pca_df$Group <- sub("^(NO|OB|POB).*", "\\1", pca_df$Sample)
pca_df$Type <- ifelse(grepl("f", pca_df$Sample), "f",
ifelse(grepl("h", pca_df$Sample), "h", "other"))
# === 透明度の設定 ===
pca_df$Alpha <- ifelse(pca_df$Type == "f", 0.3,
ifelse(pca_df$Type == "h", 0.8, 1.0))
# === 色マップ設定 ===
color_map <- c("NO" = "dodgerblue", "OB" = "orange", "POB" = "forestgreen")
centroid_fill_map <- c("NO.f" = "dodgerblue", "NO.h" = "dodgerblue",
"OB.f" = "orange", "OB.h" = "orange",
"POB.f" = "forestgreen", "POB.h" = "forestgreen")
# === f/hの重心を計算 ===
centroids <- pca_df %>%
filter(Type %in% c("f", "h")) %>%
group_by(Group, Type) %>%
summarise(PC1 = mean(PC1), PC2 = mean(PC2), .groups = "drop") %>%
mutate(ID = paste(Group, Type, sep = "."),
Alpha = ifelse(Type == "f", 0.3, 0.8))
# === 矢印用データ:f と h の重心を結ぶペア ===
arrow_data <- centroids %>%
select(Group, Type, PC1, PC2) %>%
pivot_wider(names_from = Type, values_from = c(PC1, PC2)) %>%
filter(!is.na(PC1_f) & !is.na(PC1_h)) %>%
mutate(x_start = PC1_f, y_start = PC2_f,
x_end = PC1_h, y_end = PC2_h)
# === プロット ===
p <- ggplot(pca_df, aes(x = PC1, y = PC2)) +
geom_point(aes(color = Group, alpha = Alpha), size = 3) +
scale_color_manual(values = color_map) +
scale_alpha_identity() +
# 四角い重心ポイント(f: 透明度低く, h: 高く)
geom_point(data = centroids,
aes(x = PC1, y = PC2, fill = ID, alpha = Alpha),
shape = 22, size = 5, color = "black", stroke = 1.2, show.legend = FALSE) +
scale_fill_manual(values = centroid_fill_map) +
# 矢印追加
geom_segment(data = arrow_data,
aes(x = x_start, y = y_start, xend = x_end, yend = y_end),
arrow = arrow(length = unit(0.25, "cm")),
color = "black", size = 1) +
# 軸・タイトルなど
theme_minimal(base_size = 14) +
labs(title = "PCA with Group Centroids and Arrows",
x = paste0("PC1 (", round(summary(pca)$importance[2,1]*100, 1), "%)"),
y = paste0("PC2 (", round(summary(pca)$importance[2,2]*100, 1), "%)")) +
theme(legend.position = "right")
# === 描画 ===
print(p)
POB
# === ライブラリ読み込み ===
library(ggplot2)
library(dplyr)
library(tidyr)
# === データ読み込み ===
exp <- read.table("ExpressionTable_norm_renamed_DEG_POB.tsv", sep = "\t", header = TRUE, row.names = 1, check.names = FALSE)
# === PCA準備 ===
exp_t <- t(exp)
pca <- prcomp(exp_t, scale. = TRUE)
# === PCA結果データフレーム ===
pca_df <- as.data.frame(pca$x)
pca_df$Sample <- rownames(pca_df)
# === グループとタイプ(f/h)を抽出 ===
pca_df$Group <- sub("^(NO|OB|POB).*", "\\1", pca_df$Sample)
pca_df$Type <- ifelse(grepl("f", pca_df$Sample), "f",
ifelse(grepl("h", pca_df$Sample), "h", "other"))
# === 透明度の設定 ===
pca_df$Alpha <- ifelse(pca_df$Type == "f", 0.3,
ifelse(pca_df$Type == "h", 0.8, 1.0))
# === 色マップ設定 ===
color_map <- c("NO" = "dodgerblue", "OB" = "orange", "POB" = "forestgreen")
centroid_fill_map <- c("NO.f" = "dodgerblue", "NO.h" = "dodgerblue",
"OB.f" = "orange", "OB.h" = "orange",
"POB.f" = "forestgreen", "POB.h" = "forestgreen")
# === f/hの重心を計算 ===
centroids <- pca_df %>%
filter(Type %in% c("f", "h")) %>%
group_by(Group, Type) %>%
summarise(PC1 = mean(PC1), PC2 = mean(PC2), .groups = "drop") %>%
mutate(ID = paste(Group, Type, sep = "."),
Alpha = ifelse(Type == "f", 0.3, 0.8))
# === 矢印用データ:f と h の重心を結ぶペア ===
arrow_data <- centroids %>%
select(Group, Type, PC1, PC2) %>%
pivot_wider(names_from = Type, values_from = c(PC1, PC2)) %>%
filter(!is.na(PC1_f) & !is.na(PC1_h)) %>%
mutate(x_start = PC1_f, y_start = PC2_f,
x_end = PC1_h, y_end = PC2_h)
# === プロット ===
p <- ggplot(pca_df, aes(x = PC1, y = PC2)) +
geom_point(aes(color = Group, alpha = Alpha), size = 3) +
scale_color_manual(values = color_map) +
scale_alpha_identity() +
# 四角い重心ポイント(f: 透明度低く, h: 高く)
geom_point(data = centroids,
aes(x = PC1, y = PC2, fill = ID, alpha = Alpha),
shape = 22, size = 5, color = "black", stroke = 1.2, show.legend = FALSE) +
scale_fill_manual(values = centroid_fill_map) +
# 矢印追加
geom_segment(data = arrow_data,
aes(x = x_start, y = y_start, xend = x_end, yend = y_end),
arrow = arrow(length = unit(0.25, "cm")),
color = "black", size = 1) +
# 軸・タイトルなど
theme_minimal(base_size = 14) +
labs(title = "PCA with Group Centroids and Arrows",
x = paste0("PC1 (", round(summary(pca)$importance[2,1]*100, 1), "%)"),
y = paste0("PC2 (", round(summary(pca)$importance[2,2]*100, 1), "%)")) +
theme(legend.position = "right")
# === 描画 ===
print(p)
PCA using DEGs in at least one comparison
# === ライブラリ読み込み ===
library(ggplot2)
library(dplyr)
library(tidyr)
# === データ読み込み ===
exp <- read.table("ExpressionTable_norm_renamed_DEG.tsv", sep = "\t", header = TRUE, row.names = 1, check.names = FALSE)
# === PCA準備 ===
exp_t <- t(exp)
pca <- prcomp(exp_t, scale. = TRUE)
# === PCA結果データフレーム ===
pca_df <- as.data.frame(pca$x)
pca_df$Sample <- rownames(pca_df)
# === グループとタイプ(f/h)を抽出 ===
pca_df$Group <- sub("^(NO|OB|POB).*", "\\1", pca_df$Sample)
pca_df$Type <- ifelse(grepl("f", pca_df$Sample), "f",
ifelse(grepl("h", pca_df$Sample), "h", "other"))
# === 透明度の設定 ===
pca_df$Alpha <- ifelse(pca_df$Type == "f", 0.3,
ifelse(pca_df$Type == "h", 0.8, 1.0))
# === 色マップ設定 ===
color_map <- c("NO" = "dodgerblue", "OB" = "orange", "POB" = "forestgreen")
centroid_fill_map <- c("NO.f" = "dodgerblue", "NO.h" = "dodgerblue",
"OB.f" = "orange", "OB.h" = "orange",
"POB.f" = "forestgreen", "POB.h" = "forestgreen")
# === f/hの重心を計算 ===
centroids <- pca_df %>%
filter(Type %in% c("f", "h")) %>%
group_by(Group, Type) %>%
summarise(PC1 = mean(PC1), PC2 = mean(PC2), .groups = "drop") %>%
mutate(ID = paste(Group, Type, sep = "."),
Alpha = ifelse(Type == "f", 0.3, 0.8))
# === 矢印用データ:f と h の重心を結ぶペア ===
arrow_data <- centroids %>%
select(Group, Type, PC1, PC2) %>%
pivot_wider(names_from = Type, values_from = c(PC1, PC2)) %>%
filter(!is.na(PC1_f) & !is.na(PC1_h)) %>%
mutate(x_start = PC1_f, y_start = PC2_f,
x_end = PC1_h, y_end = PC2_h)
# === プロット ===
p <- ggplot(pca_df, aes(x = PC1, y = PC2)) +
geom_point(aes(color = Group, alpha = Alpha), size = 3) +
scale_color_manual(values = color_map) +
scale_alpha_identity() +
# 四角い重心ポイント(f: 透明度低く, h: 高く)
geom_point(data = centroids,
aes(x = PC1, y = PC2, fill = ID, alpha = Alpha),
shape = 22, size = 5, color = "black", stroke = 1.2, show.legend = FALSE) +
scale_fill_manual(values = centroid_fill_map) +
# 矢印追加
geom_segment(data = arrow_data,
aes(x = x_start, y = y_start, xend = x_end, yend = y_end),
arrow = arrow(length = unit(0.25, "cm")),
color = "black", size = 1) +
# 軸・タイトルなど
theme_minimal(base_size = 14) +
labs(title = "PCA with Group Centroids and Arrows",
x = paste0("PC1 (", round(summary(pca)$importance[2,1]*100, 1), "%)"),
y = paste0("PC2 (", round(summary(pca)$importance[2,2]*100, 1), "%)")) +
theme(legend.position = "right")
# === 描画 ===
print(p)
PCA for Denmark or KI (check for batch effect)
# 発現データの読み込み
expr <- read.table("ExpressionTable_norm.tsv", sep = "\t", header = TRUE, row.names = 1, check.names = FALSE)
# 臨床情報の読み込み
clinical <- read.table("clinical_data.txt", sep = "\t", header = TRUE, stringsAsFactors = FALSE)
# 元の列名を取得
original_colnames <- colnames(expr)
# ベース名(末尾のf0, h0, f2, h2などを除く)を抽出
# 正規表現で末尾のf0, h0, f2, h2 を除去
base_names <- gsub("f[0-9]$|h[0-9]$", "", original_colnames)
# 新しい列名を生成
new_colnames <- mapply(function(orig, base) {
where <- clinical$Where[clinical$ID == base]
if (length(where) == 1) {
paste0(orig, "_", where)
} else {
orig # 対応が見つからない場合はそのまま
}
}, orig = original_colnames, base = base_names)
# 列名を置き換え
colnames(expr) <- new_colnames
# 保存
write.table(expr, "ExpressionTable_norm_place.tsv", sep = "\t", quote = FALSE, col.names = NA)
-> There are so many samples missing in the clinical_data.txt, so i gave up.
Merge to genes at last.(didn’t need to do this)
exp_norm <- read.table("ExpressionTable_norm.tsv", sep="\t", header=TRUE, stringsAsFactors=FALSE) # don't use row.names=1 because i will use the first column to merge.
annot <- read.table("Annotation.txt", header=TRUE, sep="\t", stringsAsFactors=FALSE)
exp_norm_ids <- exp_norm[[1]]
exp_norm$TC <- exp_norm_ids
merged <- merge(annot[, c("TC", "gene")], exp_norm, by = "TC", all.y = TRUE)
head(merged)
## TC gene NG1f0 NG1h0 NG2f0 NG2h0 NG3f0 NG3h0 NG4f0
## 1 0 <NA> 0 0.4879297 1.656625 1.212721 2.2969698 2.889605 2.3143152
## 2 0 <NA> 0 0.1626432 2.761041 2.071732 3.5498624 2.763970 1.6615596
## 3 0 <NA> 0 3.9034374 0.788869 1.313781 0.0000000 1.130715 0.0000000
## 4 0 <NA> 0 18.8666142 5.088205 11.520850 0.6264463 6.281750 1.7802425
## 5 0 <NA> 0 0.1626432 2.051059 1.111661 0.2088154 1.633255 0.2967071
## 6 0 <NA> 0 0.3252865 1.735512 1.010601 1.4617081 0.502540 1.8395839
## NG4h0 NK13f0 NK13h0 NK14f0 NK14h0 NK15f0 NK15h0
## 1 2.905230 0.0000000 0.0000000 0.000000 0.5438818 0.5753598 1.1811344
## 2 1.885851 0.0000000 0.0000000 0.000000 0.1255112 0.7671463 0.0000000
## 3 1.070348 0.0000000 5.2961169 0.000000 3.5143129 0.1917866 0.8436674
## 4 7.492435 0.5775234 22.2802157 0.000000 18.1154464 0.7671463 4.3870706
## 5 2.038758 0.0000000 0.7304989 0.000000 0.2928594 0.3835732 0.5062004
## 6 1.325193 0.0000000 0.5478742 2.630139 0.5020447 0.0000000 0.6749339
## NK22f0 NK22h0 NK25f0 NK25h0 NK26f0 NK26h0 NK27f0
## 1 0.0000000 0.0000000 0.57082415 1.1655768 0.6198429 0.4331197 0.0000000
## 2 0.0000000 0.2215411 0.22832966 0.3885256 0.6198429 0.6496796 0.3559544
## 3 0.0000000 1.9938695 0.51374174 1.8131194 0.3099215 1.2993591 0.0000000
## 4 1.0079500 27.4710911 0.45665932 7.3172321 0.3099215 5.1974365 1.4238174
## 5 0.0000000 0.0000000 0.05708242 0.5180341 0.1549607 0.6496796 0.0000000
## 6 0.3359833 0.6646232 0.45665932 2.5901706 0.3099215 0.6496796 0.7119087
## NK27h0 NK29f0 NK29h0 NK2f0 NK2h0 NK35f0 NK35h0
## 1 0.2602594 3.7597823 1.6744068 0.0000000 0.6678806 1.3151710 1.2272255
## 2 0.0000000 2.9635931 1.2558051 0.0000000 0.4452537 0.6575855 0.7012717
## 3 3.3833717 1.4154475 2.8604450 1.1108651 2.2262686 0.0000000 0.1753179
## 4 31.7516418 9.2004084 12.6975852 0.0000000 8.4598206 0.6575855 3.6816766
## 5 0.0000000 2.3443348 0.7674365 0.0000000 0.4452537 0.9863783 0.0000000
## 6 0.0000000 0.7961892 0.3488348 0.3702884 1.7810149 0.4931891 0.8765897
## NK37f0 NK37h0 NK38f0 NK38h0 NK39f0 NK39h0 NK3f0 NK3h0
## 1 1.4552990 0.0000000 0.4563273 0.9691018 0.2184543 0.5143809 4.14776 3.643239
## 2 0.7276495 0.0000000 0.6084364 0.6460679 0.1092271 0.3429206 4.14776 18.216197
## 3 0.0000000 0.0000000 0.1521091 0.6460679 0.7645900 1.7146031 0.00000 0.000000
## 4 2.5467733 2.8631913 0.3042182 5.1685430 0.1092271 6.8012588 4.14776 18.216197
## 5 0.3638248 0.0000000 0.3042182 0.3230339 0.0000000 0.2857672 0.00000 3.643239
## 6 0.7276495 0.5368484 0.3042182 0.9691018 0.4369085 2.2861374 0.00000 3.643239
## NK40f0 NK40h0 NK44f0 NK44h0 NK7f0 NK7h0 NK8f0
## 1 5.7418925 5.5697247 0.0000000 0.0000000 0.0000000 0.000000 0.7505006
## 2 1.7443724 3.1124932 0.0000000 0.0000000 0.0000000 0.000000 0.5003337
## 3 1.3082793 1.1467080 0.0000000 5.2481620 0.0000000 3.235113 0.0000000
## 4 2.8346052 5.8973555 0.9107489 27.8152584 0.7128785 23.858956 0.5003337
## 5 0.2907287 0.8190772 0.0000000 0.0000000 0.0000000 0.000000 0.0000000
## 6 1.5990080 1.1467080 1.3661233 0.5248162 0.3564393 0.000000 0.5003337
## NK8h0 NK9f0 NK9h0 NO101f0 NO101f2 NO101h0 NO101h2
## 1 0.0000000 0.79958731 1.2110521 1.623007 0.9223195 2.207386 0.000000
## 2 1.4344870 0.00000000 0.3229472 2.713880 0.9223195 2.881219 0.000000
## 3 1.1954058 0.43054702 1.9376834 1.223907 0.0000000 2.672099 5.428387
## 4 8.3678407 0.49205373 7.0644706 8.673774 1.5371991 18.007621 13.570967
## 5 0.2390812 0.06150672 0.5651577 1.223907 0.0000000 1.394138 0.000000
## 6 1.1954058 0.43054702 2.3817358 1.037660 1.5371991 1.022368 0.000000
## NO108f0 NO108f2 NO108h0 NO108h2 NO117f0 NO117f2 NO117h0
## 1 3.8699337 0.3124314 4.3292525 0.4802384 4.1866810 0.460191 3.203447
## 2 4.9583526 0.9372941 2.3564286 0.9604769 4.1866810 0.000000 3.203447
## 3 0.1209354 0.0000000 1.2604153 0.1600795 0.0000000 0.000000 0.000000
## 4 1.3907574 0.9372941 9.3709136 1.4407153 0.7850027 0.000000 2.038557
## 5 1.2698220 0.0000000 1.9180232 0.0000000 2.0933405 0.000000 1.456112
## 6 0.7860803 0.0000000 0.3836046 0.9604769 1.0466702 0.460191 1.747335
## NO117h2 NO121f0 NO121f2 NO121h0 NO121h2 NO125f0 NO125f2
## 1 0.0000000 1.5021482 0.59900377 2.3560599 0.000000 0.6270394 0.0000000
## 2 0.0000000 4.2413595 0.34228787 4.1156490 0.000000 0.8151513 0.0000000
## 3 2.0985182 1.9881373 3.16616279 2.8928837 5.004294 0.0000000 0.0000000
## 4 12.9726578 17.0096189 16.00195789 16.5818903 30.025764 1.2540789 1.9278469
## 5 0.0000000 2.4299456 0.08557197 2.5051777 1.154837 0.1254079 0.3855694
## 6 0.3815488 0.7952549 0.34228787 0.8052357 1.539783 2.3200459 0.3855694
## NO125h0 NO125h2 NO131f0 NO131f2 NO131h0 NO131h2 NO135f0
## 1 1.3769786 0.0000000 3.87836375 0.0000000 4.9061389 1.0583363 1.713434
## 2 0.1967112 0.0000000 5.50477436 0.0000000 2.9953269 0.7937522 3.426867
## 3 1.8687566 1.4180002 0.06255425 0.0000000 0.7230099 1.0583363 1.761029
## 4 14.1632082 3.5450006 1.56385635 0.3529386 9.1409113 8.2021062 12.612775
## 5 0.7868449 0.0000000 1.43874784 0.0000000 2.2723169 0.2645841 1.856220
## 6 0.8852005 0.3545001 0.81320530 1.7646930 0.4131485 0.0000000 1.237480
## NO135f2 NO135h0 NO135h2 NO146f0 NO146f2 NO146h0 NO146h2
## 1 0.0000000 1.9372761 0.2472569 3.0444569 0.9079499 1.6952729 0.4965870
## 2 0.0000000 3.4305931 0.7417708 0.3805571 0.7263599 0.3082314 0.7945392
## 3 0.0000000 2.4215951 3.7088540 0.0000000 0.5447699 0.4623471 0.0000000
## 4 1.2182694 15.4174888 18.7915268 0.5708357 1.8158997 3.6987771 2.4829350
## 5 0.9137021 1.8161963 0.2472569 0.3805571 0.4539749 0.3082314 0.0993174
## 6 0.0000000 0.8071984 0.4945139 0.3805571 0.5447699 0.3082314 0.5959044
## NO150f0 NO150f2 NO150h0 NO150h2 NO22f0 NO22f2 NO22h0
## 1 4.848906 0.0000000 4.3087060 0.0000000 4.5709747 0.0000000 2.8216199
## 2 4.434975 1.4782425 3.8577949 0.5019471 3.2911018 0.0000000 2.2925662
## 3 0.177399 0.0000000 1.3527333 0.0000000 0.0000000 0.0000000 0.0000000
## 4 1.892256 1.4782425 6.3127552 2.0077882 1.2798729 1.2984748 1.5871612
## 5 1.241793 0.7391212 1.9539480 0.5019471 0.6094633 0.0000000 0.8817562
## 6 1.005261 0.7391212 0.3507086 0.5019471 0.7923023 0.4328249 2.5570931
## NO22h2 NO27f0 NO27f2 NO27h0 NO27h2 NO32f0 NO32f2
## 1 0.0000000 3.6828320 0.7437243 2.9040732 0.2716667 3.552192 0.0000000
## 2 0.3080479 3.6828320 0.0000000 3.2670823 0.4075000 3.650864 0.4949708
## 3 0.6160959 0.1753730 0.3718622 0.0000000 0.4075000 0.000000 0.0000000
## 4 4.9287670 1.0522377 2.2311729 2.1780549 2.8525001 1.776096 1.7323980
## 5 0.3080479 1.0522377 0.3718622 1.0890274 0.4075000 2.269456 0.0000000
## 6 0.0000000 0.3507459 1.3015176 0.1815046 0.8150000 0.888048 0.7424563
## NO32h0 NO32h2 NO37f0 NO37f2 NO37h0 NO37h2 NO40f0
## 1 3.445509 0.7533388 1.3913043 0.70336486 0.8315976 0.6248568 0.4319113
## 2 2.819053 0.2511129 1.5652174 0.51987838 0.2771992 0.1041428 1.5116897
## 3 2.270904 2.2600164 0.0000000 0.06116216 2.7719920 4.1309976 0.0000000
## 4 11.354519 5.6500410 0.8695652 1.52905405 12.4739641 18.9887033 0.2159557
## 5 1.487834 0.5022259 0.1739130 0.88685135 0.0000000 0.4165712 0.2159557
## 6 1.017991 0.6277823 0.6956522 0.64220270 0.5543984 0.9719994 4.1031577
## NO40f2 NO40h0 NO40h2 NO47f0 NO47f2 NO47h0 NO47h2
## 1 0.2227340 1.5172918 0.6420715 3.87590296 0.6037583 2.6342518 0.8107016
## 2 0.8909359 1.2414206 1.0701191 5.04768758 0.0000000 4.2148030 1.0133770
## 3 0.0000000 2.4828412 3.8524289 0.09013728 0.0000000 0.3292815 1.8240786
## 4 0.2227340 19.7247936 26.3249305 0.99151006 0.0000000 2.1732578 13.1739008
## 5 0.4454679 0.8276137 0.8560953 1.26192189 0.3018791 0.7902756 1.6214032
## 6 0.2227340 1.5172918 0.2140238 1.08164734 0.0000000 0.3951378 0.4053508
## NO48f0 NO48f2 NO48h0 NO48h2 NO50f0 NO50f2 NO50h0
## 1 5.3129401 0.000000 4.318752 0.1964405 8.3852185 0.0000000 2.259935
## 2 3.3809619 0.678554 2.670807 1.3750835 4.3922573 0.0000000 3.884264
## 3 0.0000000 0.000000 2.329853 1.9644050 0.0000000 0.0000000 1.906820
## 4 0.4829946 0.339277 9.148934 9.0362629 0.9316909 0.3839242 18.715090
## 5 1.2419860 0.000000 1.193339 0.0000000 1.0647897 0.3839242 1.977443
## 6 1.5179829 2.374939 1.420642 1.5715240 0.9316909 1.1517725 1.906820
## NO50h2 NO51f0 NO51f2 NO51h0 NO51h2 NO56f0 NO56f2
## 1 0.0000000 0.7592828 0.6775729 2.0290429 0.4671365 1.7579734 0.8544474
## 2 0.2773026 2.3727587 0.1693932 1.8261386 0.3114243 3.5159468 0.2136119
## 3 1.6638155 0.0000000 0.0000000 0.4058086 1.0899851 0.0000000 0.0000000
## 4 15.2516424 0.8541931 0.3387864 0.9130693 3.4256675 0.6392631 0.2136119
## 5 0.0000000 1.1389242 0.3387864 1.2174257 0.0000000 0.4794473 0.0000000
## 6 2.2184207 1.6134759 0.5081797 1.7246865 0.3114243 0.6392631 0.8544474
## NO56h0 NO56h2 NO68f0 NO68f2 NO68h0 NO68h2 NO73f0
## 1 2.8046540 0.7082324 1.5635631 0.0000000 0.6244423 0.1909014 0.3046655
## 2 2.8804555 0.7082324 0.6700985 0.8940396 1.4570320 0.5727042 0.6093311
## 3 0.4548088 0.7082324 0.1116831 0.0000000 0.2081474 1.5272113 0.0000000
## 4 1.1370219 3.7182198 1.7869292 0.6705297 8.6381184 11.4540846 0.9139966
## 5 0.9854190 0.1770581 2.4570277 0.2235099 2.9140640 0.3818028 0.3046655
## 6 2.2740438 0.3541162 1.0051477 1.3410593 0.6244423 0.3818028 0.9139966
## NO73f2 NO73h0 NO73h2 NO77f0 NO77f2 NO77h0 NO77h2
## 1 0.3800955 0.5525031 0.2281805 1.6747464 0.1858686 2.008137 0.2179572
## 2 0.0000000 0.0000000 0.2281805 2.2708426 0.1858686 2.920926 0.4359145
## 3 0.0000000 1.9337610 0.0000000 1.0786502 0.0000000 2.669909 1.7436579
## 4 1.5203819 14.9175845 1.3690828 9.4239968 0.1858686 16.909424 3.4873158
## 5 0.0000000 0.8287547 0.0000000 1.3625056 0.0000000 1.665841 0.2179572
## 6 0.3800955 0.0000000 0.6845414 0.6812528 0.9293432 1.346364 0.0000000
## NO89f0 NO89f2 NO89h0 NO89h2
## 1 4.5265527 0.3802715 3.8222503 0.0000000
## 2 4.8985981 0.1901358 3.3635802 0.0000000
## 3 0.0000000 0.1901358 1.8346801 1.6303852
## 4 1.7362120 0.3168929 9.4282173 7.0650024
## 5 2.1702650 0.3802715 1.8856435 0.2717309
## 6 0.9301136 0.4436501 0.3567434 1.3586543
merged_out <- merged[, c("gene", colnames(merged)[!(colnames(merged) %in% c("TC", "gene"))])] # gene column to the first column and remove TC column
str(merged_out)
## 'data.frame': 29189 obs. of 139 variables:
## $ gene : chr NA NA NA NA ...
## $ NG1f0 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ NG1h0 : num 0.488 0.163 3.903 18.867 0.163 ...
## $ NG2f0 : num 1.657 2.761 0.789 5.088 2.051 ...
## $ NG2h0 : num 1.21 2.07 1.31 11.52 1.11 ...
## $ NG3f0 : num 2.297 3.55 0 0.626 0.209 ...
## $ NG3h0 : num 2.89 2.76 1.13 6.28 1.63 ...
## $ NG4f0 : num 2.314 1.662 0 1.78 0.297 ...
## $ NG4h0 : num 2.91 1.89 1.07 7.49 2.04 ...
## $ NK13f0 : num 0 0 0 0.578 0 ...
## $ NK13h0 : num 0 0 5.3 22.28 0.73 ...
## $ NK14f0 : num 0 0 0 0 0 ...
## $ NK14h0 : num 0.544 0.126 3.514 18.115 0.293 ...
## $ NK15f0 : num 0.575 0.767 0.192 0.767 0.384 ...
## $ NK15h0 : num 1.181 0 0.844 4.387 0.506 ...
## $ NK22f0 : num 0 0 0 1.01 0 ...
## $ NK22h0 : num 0 0.222 1.994 27.471 0 ...
## $ NK25f0 : num 0.5708 0.2283 0.5137 0.4567 0.0571 ...
## $ NK25h0 : num 1.166 0.389 1.813 7.317 0.518 ...
## $ NK26f0 : num 0.62 0.62 0.31 0.31 0.155 ...
## $ NK26h0 : num 0.433 0.65 1.299 5.197 0.65 ...
## $ NK27f0 : num 0 0.356 0 1.424 0 ...
## $ NK27h0 : num 0.26 0 3.38 31.75 0 ...
## $ NK29f0 : num 3.76 2.96 1.42 9.2 2.34 ...
## $ NK29h0 : num 1.674 1.256 2.86 12.698 0.767 ...
## $ NK2f0 : num 0 0 1.11 0 0 ...
## $ NK2h0 : num 0.668 0.445 2.226 8.46 0.445 ...
## $ NK35f0 : num 1.315 0.658 0 0.658 0.986 ...
## $ NK35h0 : num 1.227 0.701 0.175 3.682 0 ...
## $ NK37f0 : num 1.455 0.728 0 2.547 0.364 ...
## $ NK37h0 : num 0 0 0 2.86 0 ...
## $ NK38f0 : num 0.456 0.608 0.152 0.304 0.304 ...
## $ NK38h0 : num 0.969 0.646 0.646 5.169 0.323 ...
## $ NK39f0 : num 0.218 0.109 0.765 0.109 0 ...
## $ NK39h0 : num 0.514 0.343 1.715 6.801 0.286 ...
## $ NK3f0 : num 4.15 4.15 0 4.15 0 ...
## $ NK3h0 : num 3.64 18.22 0 18.22 3.64 ...
## $ NK40f0 : num 5.742 1.744 1.308 2.835 0.291 ...
## $ NK40h0 : num 5.57 3.112 1.147 5.897 0.819 ...
## $ NK44f0 : num 0 0 0 0.911 0 ...
## $ NK44h0 : num 0 0 5.25 27.82 0 ...
## $ NK7f0 : num 0 0 0 0.713 0 ...
## $ NK7h0 : num 0 0 3.24 23.86 0 ...
## $ NK8f0 : num 0.751 0.5 0 0.5 0 ...
## $ NK8h0 : num 0 1.434 1.195 8.368 0.239 ...
## $ NK9f0 : num 0.7996 0 0.4305 0.4921 0.0615 ...
## $ NK9h0 : num 1.211 0.323 1.938 7.064 0.565 ...
## $ NO101f0: num 1.62 2.71 1.22 8.67 1.22 ...
## $ NO101f2: num 0.922 0.922 0 1.537 0 ...
## $ NO101h0: num 2.21 2.88 2.67 18.01 1.39 ...
## $ NO101h2: num 0 0 5.43 13.57 0 ...
## $ NO108f0: num 3.87 4.958 0.121 1.391 1.27 ...
## $ NO108f2: num 0.312 0.937 0 0.937 0 ...
## $ NO108h0: num 4.33 2.36 1.26 9.37 1.92 ...
## $ NO108h2: num 0.48 0.96 0.16 1.44 0 ...
## $ NO117f0: num 4.187 4.187 0 0.785 2.093 ...
## $ NO117f2: num 0.46 0 0 0 0 ...
## $ NO117h0: num 3.2 3.2 0 2.04 1.46 ...
## $ NO117h2: num 0 0 2.1 13 0 ...
## $ NO121f0: num 1.5 4.24 1.99 17.01 2.43 ...
## $ NO121f2: num 0.599 0.3423 3.1662 16.002 0.0856 ...
## $ NO121h0: num 2.36 4.12 2.89 16.58 2.51 ...
## $ NO121h2: num 0 0 5 30.03 1.15 ...
## $ NO125f0: num 0.627 0.815 0 1.254 0.125 ...
## $ NO125f2: num 0 0 0 1.928 0.386 ...
## $ NO125h0: num 1.377 0.197 1.869 14.163 0.787 ...
## $ NO125h2: num 0 0 1.42 3.55 0 ...
## $ NO131f0: num 3.8784 5.5048 0.0626 1.5639 1.4387 ...
## $ NO131f2: num 0 0 0 0.353 0 ...
## $ NO131h0: num 4.906 2.995 0.723 9.141 2.272 ...
## $ NO131h2: num 1.058 0.794 1.058 8.202 0.265 ...
## $ NO135f0: num 1.71 3.43 1.76 12.61 1.86 ...
## $ NO135f2: num 0 0 0 1.218 0.914 ...
## $ NO135h0: num 1.94 3.43 2.42 15.42 1.82 ...
## $ NO135h2: num 0.247 0.742 3.709 18.792 0.247 ...
## $ NO146f0: num 3.044 0.381 0 0.571 0.381 ...
## $ NO146f2: num 0.908 0.726 0.545 1.816 0.454 ...
## $ NO146h0: num 1.695 0.308 0.462 3.699 0.308 ...
## $ NO146h2: num 0.4966 0.7945 0 2.4829 0.0993 ...
## $ NO150f0: num 4.849 4.435 0.177 1.892 1.242 ...
## $ NO150f2: num 0 1.478 0 1.478 0.739 ...
## $ NO150h0: num 4.31 3.86 1.35 6.31 1.95 ...
## $ NO150h2: num 0 0.502 0 2.008 0.502 ...
## $ NO22f0 : num 4.571 3.291 0 1.28 0.609 ...
## $ NO22f2 : num 0 0 0 1.3 0 ...
## $ NO22h0 : num 2.822 2.293 0 1.587 0.882 ...
## $ NO22h2 : num 0 0.308 0.616 4.929 0.308 ...
## $ NO27f0 : num 3.683 3.683 0.175 1.052 1.052 ...
## $ NO27f2 : num 0.744 0 0.372 2.231 0.372 ...
## $ NO27h0 : num 2.9 3.27 0 2.18 1.09 ...
## $ NO27h2 : num 0.272 0.408 0.408 2.853 0.408 ...
## $ NO32f0 : num 3.55 3.65 0 1.78 2.27 ...
## $ NO32f2 : num 0 0.495 0 1.732 0 ...
## $ NO32h0 : num 3.45 2.82 2.27 11.35 1.49 ...
## $ NO32h2 : num 0.753 0.251 2.26 5.65 0.502 ...
## $ NO37f0 : num 1.391 1.565 0 0.87 0.174 ...
## $ NO37f2 : num 0.7034 0.5199 0.0612 1.5291 0.8869 ...
## $ NO37h0 : num 0.832 0.277 2.772 12.474 0 ...
## $ NO37h2 : num 0.625 0.104 4.131 18.989 0.417 ...
## [list output truncated]
head(merged_out)
## gene NG1f0 NG1h0 NG2f0 NG2h0 NG3f0 NG3h0 NG4f0
## 1 <NA> 0 0.4879297 1.656625 1.212721 2.2969698 2.889605 2.3143152
## 2 <NA> 0 0.1626432 2.761041 2.071732 3.5498624 2.763970 1.6615596
## 3 <NA> 0 3.9034374 0.788869 1.313781 0.0000000 1.130715 0.0000000
## 4 <NA> 0 18.8666142 5.088205 11.520850 0.6264463 6.281750 1.7802425
## 5 <NA> 0 0.1626432 2.051059 1.111661 0.2088154 1.633255 0.2967071
## 6 <NA> 0 0.3252865 1.735512 1.010601 1.4617081 0.502540 1.8395839
## NG4h0 NK13f0 NK13h0 NK14f0 NK14h0 NK15f0 NK15h0
## 1 2.905230 0.0000000 0.0000000 0.000000 0.5438818 0.5753598 1.1811344
## 2 1.885851 0.0000000 0.0000000 0.000000 0.1255112 0.7671463 0.0000000
## 3 1.070348 0.0000000 5.2961169 0.000000 3.5143129 0.1917866 0.8436674
## 4 7.492435 0.5775234 22.2802157 0.000000 18.1154464 0.7671463 4.3870706
## 5 2.038758 0.0000000 0.7304989 0.000000 0.2928594 0.3835732 0.5062004
## 6 1.325193 0.0000000 0.5478742 2.630139 0.5020447 0.0000000 0.6749339
## NK22f0 NK22h0 NK25f0 NK25h0 NK26f0 NK26h0 NK27f0
## 1 0.0000000 0.0000000 0.57082415 1.1655768 0.6198429 0.4331197 0.0000000
## 2 0.0000000 0.2215411 0.22832966 0.3885256 0.6198429 0.6496796 0.3559544
## 3 0.0000000 1.9938695 0.51374174 1.8131194 0.3099215 1.2993591 0.0000000
## 4 1.0079500 27.4710911 0.45665932 7.3172321 0.3099215 5.1974365 1.4238174
## 5 0.0000000 0.0000000 0.05708242 0.5180341 0.1549607 0.6496796 0.0000000
## 6 0.3359833 0.6646232 0.45665932 2.5901706 0.3099215 0.6496796 0.7119087
## NK27h0 NK29f0 NK29h0 NK2f0 NK2h0 NK35f0 NK35h0
## 1 0.2602594 3.7597823 1.6744068 0.0000000 0.6678806 1.3151710 1.2272255
## 2 0.0000000 2.9635931 1.2558051 0.0000000 0.4452537 0.6575855 0.7012717
## 3 3.3833717 1.4154475 2.8604450 1.1108651 2.2262686 0.0000000 0.1753179
## 4 31.7516418 9.2004084 12.6975852 0.0000000 8.4598206 0.6575855 3.6816766
## 5 0.0000000 2.3443348 0.7674365 0.0000000 0.4452537 0.9863783 0.0000000
## 6 0.0000000 0.7961892 0.3488348 0.3702884 1.7810149 0.4931891 0.8765897
## NK37f0 NK37h0 NK38f0 NK38h0 NK39f0 NK39h0 NK3f0 NK3h0
## 1 1.4552990 0.0000000 0.4563273 0.9691018 0.2184543 0.5143809 4.14776 3.643239
## 2 0.7276495 0.0000000 0.6084364 0.6460679 0.1092271 0.3429206 4.14776 18.216197
## 3 0.0000000 0.0000000 0.1521091 0.6460679 0.7645900 1.7146031 0.00000 0.000000
## 4 2.5467733 2.8631913 0.3042182 5.1685430 0.1092271 6.8012588 4.14776 18.216197
## 5 0.3638248 0.0000000 0.3042182 0.3230339 0.0000000 0.2857672 0.00000 3.643239
## 6 0.7276495 0.5368484 0.3042182 0.9691018 0.4369085 2.2861374 0.00000 3.643239
## NK40f0 NK40h0 NK44f0 NK44h0 NK7f0 NK7h0 NK8f0
## 1 5.7418925 5.5697247 0.0000000 0.0000000 0.0000000 0.000000 0.7505006
## 2 1.7443724 3.1124932 0.0000000 0.0000000 0.0000000 0.000000 0.5003337
## 3 1.3082793 1.1467080 0.0000000 5.2481620 0.0000000 3.235113 0.0000000
## 4 2.8346052 5.8973555 0.9107489 27.8152584 0.7128785 23.858956 0.5003337
## 5 0.2907287 0.8190772 0.0000000 0.0000000 0.0000000 0.000000 0.0000000
## 6 1.5990080 1.1467080 1.3661233 0.5248162 0.3564393 0.000000 0.5003337
## NK8h0 NK9f0 NK9h0 NO101f0 NO101f2 NO101h0 NO101h2
## 1 0.0000000 0.79958731 1.2110521 1.623007 0.9223195 2.207386 0.000000
## 2 1.4344870 0.00000000 0.3229472 2.713880 0.9223195 2.881219 0.000000
## 3 1.1954058 0.43054702 1.9376834 1.223907 0.0000000 2.672099 5.428387
## 4 8.3678407 0.49205373 7.0644706 8.673774 1.5371991 18.007621 13.570967
## 5 0.2390812 0.06150672 0.5651577 1.223907 0.0000000 1.394138 0.000000
## 6 1.1954058 0.43054702 2.3817358 1.037660 1.5371991 1.022368 0.000000
## NO108f0 NO108f2 NO108h0 NO108h2 NO117f0 NO117f2 NO117h0
## 1 3.8699337 0.3124314 4.3292525 0.4802384 4.1866810 0.460191 3.203447
## 2 4.9583526 0.9372941 2.3564286 0.9604769 4.1866810 0.000000 3.203447
## 3 0.1209354 0.0000000 1.2604153 0.1600795 0.0000000 0.000000 0.000000
## 4 1.3907574 0.9372941 9.3709136 1.4407153 0.7850027 0.000000 2.038557
## 5 1.2698220 0.0000000 1.9180232 0.0000000 2.0933405 0.000000 1.456112
## 6 0.7860803 0.0000000 0.3836046 0.9604769 1.0466702 0.460191 1.747335
## NO117h2 NO121f0 NO121f2 NO121h0 NO121h2 NO125f0 NO125f2
## 1 0.0000000 1.5021482 0.59900377 2.3560599 0.000000 0.6270394 0.0000000
## 2 0.0000000 4.2413595 0.34228787 4.1156490 0.000000 0.8151513 0.0000000
## 3 2.0985182 1.9881373 3.16616279 2.8928837 5.004294 0.0000000 0.0000000
## 4 12.9726578 17.0096189 16.00195789 16.5818903 30.025764 1.2540789 1.9278469
## 5 0.0000000 2.4299456 0.08557197 2.5051777 1.154837 0.1254079 0.3855694
## 6 0.3815488 0.7952549 0.34228787 0.8052357 1.539783 2.3200459 0.3855694
## NO125h0 NO125h2 NO131f0 NO131f2 NO131h0 NO131h2 NO135f0
## 1 1.3769786 0.0000000 3.87836375 0.0000000 4.9061389 1.0583363 1.713434
## 2 0.1967112 0.0000000 5.50477436 0.0000000 2.9953269 0.7937522 3.426867
## 3 1.8687566 1.4180002 0.06255425 0.0000000 0.7230099 1.0583363 1.761029
## 4 14.1632082 3.5450006 1.56385635 0.3529386 9.1409113 8.2021062 12.612775
## 5 0.7868449 0.0000000 1.43874784 0.0000000 2.2723169 0.2645841 1.856220
## 6 0.8852005 0.3545001 0.81320530 1.7646930 0.4131485 0.0000000 1.237480
## NO135f2 NO135h0 NO135h2 NO146f0 NO146f2 NO146h0 NO146h2
## 1 0.0000000 1.9372761 0.2472569 3.0444569 0.9079499 1.6952729 0.4965870
## 2 0.0000000 3.4305931 0.7417708 0.3805571 0.7263599 0.3082314 0.7945392
## 3 0.0000000 2.4215951 3.7088540 0.0000000 0.5447699 0.4623471 0.0000000
## 4 1.2182694 15.4174888 18.7915268 0.5708357 1.8158997 3.6987771 2.4829350
## 5 0.9137021 1.8161963 0.2472569 0.3805571 0.4539749 0.3082314 0.0993174
## 6 0.0000000 0.8071984 0.4945139 0.3805571 0.5447699 0.3082314 0.5959044
## NO150f0 NO150f2 NO150h0 NO150h2 NO22f0 NO22f2 NO22h0
## 1 4.848906 0.0000000 4.3087060 0.0000000 4.5709747 0.0000000 2.8216199
## 2 4.434975 1.4782425 3.8577949 0.5019471 3.2911018 0.0000000 2.2925662
## 3 0.177399 0.0000000 1.3527333 0.0000000 0.0000000 0.0000000 0.0000000
## 4 1.892256 1.4782425 6.3127552 2.0077882 1.2798729 1.2984748 1.5871612
## 5 1.241793 0.7391212 1.9539480 0.5019471 0.6094633 0.0000000 0.8817562
## 6 1.005261 0.7391212 0.3507086 0.5019471 0.7923023 0.4328249 2.5570931
## NO22h2 NO27f0 NO27f2 NO27h0 NO27h2 NO32f0 NO32f2
## 1 0.0000000 3.6828320 0.7437243 2.9040732 0.2716667 3.552192 0.0000000
## 2 0.3080479 3.6828320 0.0000000 3.2670823 0.4075000 3.650864 0.4949708
## 3 0.6160959 0.1753730 0.3718622 0.0000000 0.4075000 0.000000 0.0000000
## 4 4.9287670 1.0522377 2.2311729 2.1780549 2.8525001 1.776096 1.7323980
## 5 0.3080479 1.0522377 0.3718622 1.0890274 0.4075000 2.269456 0.0000000
## 6 0.0000000 0.3507459 1.3015176 0.1815046 0.8150000 0.888048 0.7424563
## NO32h0 NO32h2 NO37f0 NO37f2 NO37h0 NO37h2 NO40f0
## 1 3.445509 0.7533388 1.3913043 0.70336486 0.8315976 0.6248568 0.4319113
## 2 2.819053 0.2511129 1.5652174 0.51987838 0.2771992 0.1041428 1.5116897
## 3 2.270904 2.2600164 0.0000000 0.06116216 2.7719920 4.1309976 0.0000000
## 4 11.354519 5.6500410 0.8695652 1.52905405 12.4739641 18.9887033 0.2159557
## 5 1.487834 0.5022259 0.1739130 0.88685135 0.0000000 0.4165712 0.2159557
## 6 1.017991 0.6277823 0.6956522 0.64220270 0.5543984 0.9719994 4.1031577
## NO40f2 NO40h0 NO40h2 NO47f0 NO47f2 NO47h0 NO47h2
## 1 0.2227340 1.5172918 0.6420715 3.87590296 0.6037583 2.6342518 0.8107016
## 2 0.8909359 1.2414206 1.0701191 5.04768758 0.0000000 4.2148030 1.0133770
## 3 0.0000000 2.4828412 3.8524289 0.09013728 0.0000000 0.3292815 1.8240786
## 4 0.2227340 19.7247936 26.3249305 0.99151006 0.0000000 2.1732578 13.1739008
## 5 0.4454679 0.8276137 0.8560953 1.26192189 0.3018791 0.7902756 1.6214032
## 6 0.2227340 1.5172918 0.2140238 1.08164734 0.0000000 0.3951378 0.4053508
## NO48f0 NO48f2 NO48h0 NO48h2 NO50f0 NO50f2 NO50h0
## 1 5.3129401 0.000000 4.318752 0.1964405 8.3852185 0.0000000 2.259935
## 2 3.3809619 0.678554 2.670807 1.3750835 4.3922573 0.0000000 3.884264
## 3 0.0000000 0.000000 2.329853 1.9644050 0.0000000 0.0000000 1.906820
## 4 0.4829946 0.339277 9.148934 9.0362629 0.9316909 0.3839242 18.715090
## 5 1.2419860 0.000000 1.193339 0.0000000 1.0647897 0.3839242 1.977443
## 6 1.5179829 2.374939 1.420642 1.5715240 0.9316909 1.1517725 1.906820
## NO50h2 NO51f0 NO51f2 NO51h0 NO51h2 NO56f0 NO56f2
## 1 0.0000000 0.7592828 0.6775729 2.0290429 0.4671365 1.7579734 0.8544474
## 2 0.2773026 2.3727587 0.1693932 1.8261386 0.3114243 3.5159468 0.2136119
## 3 1.6638155 0.0000000 0.0000000 0.4058086 1.0899851 0.0000000 0.0000000
## 4 15.2516424 0.8541931 0.3387864 0.9130693 3.4256675 0.6392631 0.2136119
## 5 0.0000000 1.1389242 0.3387864 1.2174257 0.0000000 0.4794473 0.0000000
## 6 2.2184207 1.6134759 0.5081797 1.7246865 0.3114243 0.6392631 0.8544474
## NO56h0 NO56h2 NO68f0 NO68f2 NO68h0 NO68h2 NO73f0
## 1 2.8046540 0.7082324 1.5635631 0.0000000 0.6244423 0.1909014 0.3046655
## 2 2.8804555 0.7082324 0.6700985 0.8940396 1.4570320 0.5727042 0.6093311
## 3 0.4548088 0.7082324 0.1116831 0.0000000 0.2081474 1.5272113 0.0000000
## 4 1.1370219 3.7182198 1.7869292 0.6705297 8.6381184 11.4540846 0.9139966
## 5 0.9854190 0.1770581 2.4570277 0.2235099 2.9140640 0.3818028 0.3046655
## 6 2.2740438 0.3541162 1.0051477 1.3410593 0.6244423 0.3818028 0.9139966
## NO73f2 NO73h0 NO73h2 NO77f0 NO77f2 NO77h0 NO77h2
## 1 0.3800955 0.5525031 0.2281805 1.6747464 0.1858686 2.008137 0.2179572
## 2 0.0000000 0.0000000 0.2281805 2.2708426 0.1858686 2.920926 0.4359145
## 3 0.0000000 1.9337610 0.0000000 1.0786502 0.0000000 2.669909 1.7436579
## 4 1.5203819 14.9175845 1.3690828 9.4239968 0.1858686 16.909424 3.4873158
## 5 0.0000000 0.8287547 0.0000000 1.3625056 0.0000000 1.665841 0.2179572
## 6 0.3800955 0.0000000 0.6845414 0.6812528 0.9293432 1.346364 0.0000000
## NO89f0 NO89f2 NO89h0 NO89h2
## 1 4.5265527 0.3802715 3.8222503 0.0000000
## 2 4.8985981 0.1901358 3.3635802 0.0000000
## 3 0.0000000 0.1901358 1.8346801 1.6303852
## 4 1.7362120 0.3168929 9.4282173 7.0650024
## 5 2.1702650 0.3802715 1.8856435 0.2717309
## 6 0.9301136 0.4436501 0.3567434 1.3586543
write.table(merged_out, file = "ExpressionTable_with_gene.tsv", sep = "\t", row.names = FALSE, quote = FALSE)
# 1. データの読み込み
exp <- read.table("ExpressionTable.txt", header = FALSE, sep = "\t", stringsAsFactors = FALSE)
annot <- read.table("Annotation.txt", header = TRUE, sep = "\t", stringsAsFactors = FALSE)
# 2. 位置情報 -> 遺伝子名のベースマッピングを作成
position_to_gene <- setNames(annot$gene, annot$TC)
# 3. ExpressionTableの1列目(位置情報)を基に、gene名をマッピング
original_positions <- exp$V1
mapped_genes <- ifelse(original_positions %in% names(position_to_gene),
position_to_gene[original_positions],
original_positions)
# 4. 同じgene名が複数回出現した場合に .1, .2, ... を付けてユニークにする
make_unique <- function(x) {
ave(x, x, FUN = function(y) {
if (length(y) == 1) return(y)
paste0(y[1], ".", seq_along(y))
})
}
unique_genes <- make_unique(mapped_genes)
# 5. ExpressionTableの1列目を置換
exp$V1 <- unique_genes
# 6. 対応表を作成(元の位置情報と新しいgene名の対応)
mapping_table <- data.frame(Position = original_positions, GeneName = unique_genes, stringsAsFactors = FALSE)
# 7. 出力
write.table(exp, file = "ExpressionTable_annotated.txt", sep = "\t", quote = FALSE, row.names = FALSE, col.names = FALSE)
write.table(mapping_table, file = "Position_to_Gene_mapping.txt", sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE)
# 1. アノテーション済み発現データを読み込む
exp_annot <- read.table("ExpressionTable_annotated.txt", header = TRUE, sep = "\t", stringsAsFactors = FALSE)
# 2. 最初の列(遺伝子名)と残りの数値部分に分ける
gene_names_full <- exp_annot[, 1]
expression_values <- exp_annot[, -1]
expression_values <- apply(expression_values, 2, as.numeric) # 数値に変換
# 3. 接尾辞(.1, .2 など)を取り除いて、ベースの遺伝子名だけを使う
gene_names_base <- sub("\\.\\d+$", "", gene_names_full)
# 4. 同じベース名ごとに合計
summarized <- aggregate(expression_values, by = list(Gene = gene_names_base), FUN = sum)
# 5. 結果を保存
write.table(summarized, file = "ExpressionTable_annotated_gene.txt", sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE)
Differential expression analysis using edgeR
library(edgeR)
library(dplyr)
# データ読み込み
counts <- read.table("ExpressionTable_annotated_gene.txt", sep="\t", header=TRUE, row.names=1, check.names=FALSE, stringsAsFactors=FALSE)
cohort <- read.table("Cohort.txt", header = TRUE, sep = "\t", stringsAsFactors = FALSE)
# 一致するサンプルのみ抽出
common_samples <- intersect(colnames(counts), cohort$Cond)
counts <- counts[, common_samples]
cohort <- cohort[cohort$Cond %in% common_samples, ]
# group(NO.f, NO.h, OB.f, OB.h, POB.f, POB.h など)を因子として定義
group <- factor(cohort$newcond3)
# デザインマトリクスの作成
design <- model.matrix(~ 0 + group) # インターセプトなしで各群を列に
colnames(design) <- levels(group) # 列名を分かりやすく
dge <- DGEList(counts = counts, group = group)
keep <- filterByExpr(dge, design)
dge <- dge[keep, , keep.lib.sizes = FALSE]
dge <- calcNormFactors(dge)
dge <- estimateDisp(dge, design)
fit <- glmQLFit(dge, design)
# 比較したいペア(h vs f)
contrast_NO <- makeContrasts(NO.h - NO.f, levels = design)
contrast_OB <- makeContrasts(OB.h - OB.f, levels = design)
contrast_POB <- makeContrasts(POB.h - POB.f, levels = design)
qlf_NO <- glmQLFTest(fit, contrast = contrast_NO)
qlf_OB <- glmQLFTest(fit, contrast = contrast_OB)
qlf_POB <- glmQLFTest(fit, contrast = contrast_POB)
# 結果取得
res_NO <- topTags(qlf_NO, n = Inf)$table
res_OB <- topTags(qlf_OB, n = Inf)$table
res_POB <- topTags(qlf_POB, n = Inf)$table
# 保存
write.csv(res_NO, "edgeR_NO_h_vs_f_gene.csv")
write.csv(res_OB, "edgeR_OB_h_vs_f_gene.csv")
write.csv(res_POB, "edgeR_POB_h_vs_f_gene.csv")
# plot_volcano function that i defined earlier
plot_volcano(res_NO, title = "NO.h vs NO.f")
plot_volcano(res_OB, title = "OB.h vs OB.f")
plot_volcano(res_POB, title = "POB.h vs POB.f")
さらに遺伝子名をvolcano plotに入れる。
library(ggplot2)
library(ggrepel)
plot_volcano2 <- function(res,
title = "Volcano Plot",
logFC_threshold = 1,
FDR_threshold = 0.05,
label_logFC = 2,
label_neglogFDR = 10) {
res$gene <- rownames(res)
res$Significant <- with(res, ifelse(FDR < FDR_threshold & abs(logFC) >= logFC_threshold,
ifelse(logFC > 0, "Up", "Down"),
"Not Significant"))
res$label <- with(res, ifelse(abs(logFC) > label_logFC & -log10(FDR) > label_neglogFDR, gene, NA))
ggplot(res, aes(x = logFC, y = -log10(FDR), color = Significant)) +
geom_point(alpha = 0.7) +
scale_color_manual(values = c("Up" = "red", "Down" = "blue", "Not Significant" = "grey")) +
theme_minimal() +
labs(title = title,
x = "log2 Fold Change",
y = "-log10 FDR") +
geom_vline(xintercept = c(-logFC_threshold, logFC_threshold), linetype = "dashed", color = "black") +
geom_hline(yintercept = -log10(FDR_threshold), linetype = "dashed", color = "black") +
ggrepel::geom_text_repel(aes(label = label), max.overlaps = 20, segment.size = 0.2, min.segment.length = 0, size = 3)
}
plot_volcano2(res_NO, label_logFC = 1, label_neglogFDR = 10)
## Warning: Removed 14608 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
plot_volcano2(res_OB, label_logFC = 1.5, label_neglogFDR = 5)
## Warning: Removed 14612 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
plot_volcano2(res_POB, label_logFC = 2, label_neglogFDR = 10)
## Warning: Removed 14609 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
さらにfold changeで散布図を作る。
library(tibble)
# fold change だけ取り出し(行名を保持)
fc_POB <- res_POB %>% rownames_to_column("Gene") %>% select(Gene, logFC)
fc_NO <- res_NO %>% rownames_to_column("Gene") %>% select(Gene, logFC)
# フォールドチェンジのテーブルをGeneでマージ
fc_df_POBNO <- full_join(
fc_NO %>% rename(FC_NO = logFC),
fc_POB %>% rename(FC_POB = logFC),
by = "Gene"
)
# 距離と象限を計算してから、上位5つずつ抽出 & ラベル
fc_df_POBNO_labeled <- fc_df_POBNO %>%
mutate(
quadrant = case_when(
FC_NO < 0 & FC_POB > 0 ~ "Q2",
FC_NO > 0 & FC_POB < 0 ~ "Q4",
TRUE ~ "other"
),
distance = sqrt(FC_NO^2 + FC_POB^2)
)
top_q2 <- fc_df_POBNO_labeled %>% filter(quadrant == "Q2") %>% slice_max(distance, n = 5)
top_q4 <- fc_df_POBNO_labeled %>% filter(quadrant == "Q4") %>% slice_max(distance, n = 5)
# 全体の散布図
ggplot(fc_df_POBNO, aes(x = FC_NO, y = FC_POB)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray") +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray") +
geom_abline(slope = 1, intercept = 0, linetype = "solid", color = "blue") +
theme_minimal() +
labs(
title = "Fold Change: POB vs NO (h vs f)",
x = "log2(FC) NO.f vs NO.h",
y = "log2(FC) POB.f vs POB.h"
)
# ラベル付き散布図
ggplot(fc_df_POBNO_labeled, aes(x = FC_NO, y = FC_POB, color = quadrant)) +
geom_point(alpha = 0.6) +
geom_text_repel(data = top_q2, aes(label = Gene), size = 3, max.overlaps = Inf, color = "black") +
geom_text_repel(data = top_q4, aes(label = Gene), size = 3, max.overlaps = Inf, color = "black") +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray") +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray") +
geom_abline(slope = 1, intercept = 0, linetype = "solid", color = "blue") +
scale_color_manual(values = c(Q2 = "orange", Q4 = "orange", other = "gray")) +
theme_minimal()
venn diagram of DEGs
# 全てのDEG
deg_NO <- rownames(res_NO[res_NO$FDR < 0.05, ])
deg_OB <- rownames(res_OB[res_OB$FDR < 0.05, ])
deg_POB <- rownames(res_POB[res_POB$FDR < 0.05, ])
library(VennDiagram)
## Loading required package: grid
## Loading required package: futile.logger
# リスト化
venn_list <- list(
NO = deg_NO,
OB = deg_OB,
POB = deg_POB
)
# ベン図をファイルに出力せずにプロット
venn.plot <- venn.diagram(
x = venn_list,
filename = NULL,
col = "black",
fill = c("dodgerblue", "orange", "forestgreen"),
alpha = 0.5,
cex = 1.2,
cat.cex = 1.2,
cat.pos = 0
)
grid.newpage()
grid.draw(venn.plot)
comparison of POB.f and NO.f, and POB.h and NO.h
# 比較したいペア(h vs f)
contrast_POBNOf <- makeContrasts(POB.f - NO.f, levels = design)
contrast_POBNOh <- makeContrasts(POB.h - NO.h, levels = design)
qlf_POBNOf <- glmQLFTest(fit, contrast = contrast_POBNOf)
qlf_POBNOh <- glmQLFTest(fit, contrast = contrast_POBNOh)
# 結果取得
res_POBNOf <- topTags(qlf_POBNOf, n = Inf)$table
res_POBNOh <- topTags(qlf_POBNOh, n = Inf)$table
# 保存
write.csv(res_POBNOf, "edgeR_POB_f_vs_NO_f_gene.csv")
write.csv(res_POBNOh, "edgeR_POB_h_vs_NO_h_gene.csv")
# plot_volcano function that i defined earlier
plot_volcano2(res_POBNOf, label_logFC = 5, label_neglogFDR = 3, title = "POB.f vs NO.f")
## Warning: Removed 14613 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
plot_volcano2(res_POBNOh, label_logFC = 5, label_neglogFDR = 3, title = "POB.h vs NO.h")
## Warning: Removed 14612 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
# get fold change
fc_f <- res_POBNOf %>% select(logFC)
fc_h <- res_POBNOh %>% select(logFC)
# 遺伝子名を共通キーにして結合
fc_df <- full_join(
tibble(Gene = rownames(fc_f), FC_f = fc_f$logFC),
tibble(Gene = rownames(fc_h), FC_h = fc_h$logFC),
by = "Gene"
)
# 散布図を描画
ggplot(fc_df, aes(x = FC_f, y = FC_h)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray") +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray") +
geom_abline(slope = 1, intercept = 0, linetype = "solid", color = "blue") +
theme_minimal() +
labs(
title = "Fold Change: h vs f (POB vs NO)",
x = "log2(FC) POB.f vs NO.f",
y = "log2(FC) POB.h vs NO.h"
)
library(tibble)
# fold change だけ取り出し(行名を保持)
fc_f <- res_POBNOf %>% rownames_to_column("Gene") %>% select(Gene, logFC)
fc_h <- res_POBNOh %>% rownames_to_column("Gene") %>% select(Gene, logFC)
# フォールドチェンジのテーブルをGeneでマージ
fc_df <- full_join(
fc_f %>% rename(FC_f = logFC),
fc_h %>% rename(FC_h = logFC),
by = "Gene"
)
# 距離と象限を計算してから、上位5つずつ抽出 & ラベル
fc_df_labeled <- fc_df %>%
mutate(
quadrant = case_when(
FC_f < 0 & FC_h > 0 ~ "Q2",
FC_f > 0 & FC_h < 0 ~ "Q4",
TRUE ~ "other"
),
distance = sqrt(FC_f^2 + FC_h^2)
)
top_q2 <- fc_df_labeled %>% filter(quadrant == "Q2") %>% slice_max(distance, n = 5)
top_q4 <- fc_df_labeled %>% filter(quadrant == "Q4") %>% slice_max(distance, n = 5)
# ラベル付き散布図
ggplot(fc_df_labeled, aes(x = FC_f, y = FC_h, color = quadrant)) +
geom_point(alpha = 0.6) +
geom_text_repel(data = top_q2, aes(label = Gene), size = 3, max.overlaps = Inf, color = "black") +
geom_text_repel(data = top_q4, aes(label = Gene), size = 3, max.overlaps = Inf, color = "black") +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray") +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray") +
geom_abline(slope = 1, intercept = 0, linetype = "solid", color = "blue") +
scale_color_manual(values = c(Q2 = "orange", Q4 = "orange", other = "gray")) +
coord_cartesian(xlim = c(-2.5, 2.5), ylim = c(-2.5, 2.5)) +
theme_minimal()
Box plot of representative genes
library(ggplot2)
library(dplyr)
library(edgeR)
library(ggsignif)
# CPMに変換
cpm_matrix <- cpm(dge, log = FALSE)
# 遺伝子指定
target_gene <- "PPP1R3B"
tss_rows <- grep(paste0("^", target_gene), rownames(cpm_matrix), value = TRUE)
expr_values <- colSums(cpm_matrix[tss_rows, , drop = FALSE])
# データ整形
plot_df <- data.frame(Sample = names(expr_values), Expression = expr_values)
plot_df <- left_join(plot_df, cohort, by = c("Sample" = "Cond"))
# 条件情報を加工
plot_df <- plot_df %>%
mutate(
Group = sub("\\..*", "", newcond3),
Time = sub(".*\\.", "", newcond3),
Group = factor(Group, levels = c("NO", "OB", "POB")),
Time = factor(Time, levels = c("f", "h")),
FillColor = case_when(
Group == "NO" ~ "dodgerblue",
Group == "OB" ~ "orange",
Group == "POB" ~ "forestgreen"
),
AlphaVal = ifelse(Time == "f", 0.3, 0.8),
Cond = newcond3
)
# 検定対象の条件ペア
comparisons <- list(c("NO.f", "NO.h"), c("OB.f", "OB.h"), c("POB.f", "POB.h"))
# 各ペアでt検定し、p値とアスタリスク記号を取得
pvals <- sapply(comparisons, function(pair) {
vals1 <- plot_df$Expression[plot_df$Cond == pair[1]]
vals2 <- plot_df$Expression[plot_df$Cond == pair[2]]
if (length(vals1) >= 2 && length(vals2) >= 2) {
t.test(vals1, vals2)$p.value
} else {
NA
}
})
# アスタリスクレベルを決定
stars <- sapply(pvals, function(p) {
if (is.na(p)) return(NA)
else if (p < 0.001) return("***")
else if (p < 0.01) return("**")
else if (p < 0.05) return("*")
else return(NA)
})
# 有意な比較のみ抽出
sig_comparisons <- comparisons[!is.na(stars)]
sig_stars <- stars[!is.na(stars)]
# プロット
ggplot(plot_df, aes(x = Cond, y = Expression, fill = FillColor, alpha = AlphaVal)) +
geom_boxplot(outlier.alpha = 0.2, color = "gray30") +
geom_jitter(width = 0.2, size = 2) +
scale_fill_identity() +
scale_alpha_identity() +
geom_signif(
comparisons = sig_comparisons,
annotations = sig_stars,
tip_length = 0.01,
textsize = 5,
color = "black" # 横棒とアスタリスクを黒で描画
) +
theme_minimal() +
labs(
title = paste0(target_gene, " expression (CPM) across conditions"),
x = "Condition", y = "CPM (Counts per Million)"
)
library(ggplot2)
library(dplyr)
library(edgeR)
library(ggsignif)
# CPMに変換
cpm_matrix <- cpm(dge, log = FALSE)
# 遺伝子指定
target_gene <- "SLC2A4"
tss_rows <- grep(paste0("^", target_gene), rownames(cpm_matrix), value = TRUE)
expr_values <- colSums(cpm_matrix[tss_rows, , drop = FALSE])
# データ整形
plot_df <- data.frame(Sample = names(expr_values), Expression = expr_values)
plot_df <- left_join(plot_df, cohort, by = c("Sample" = "Cond"))
# 条件情報を加工
plot_df <- plot_df %>%
mutate(
Group = sub("\\..*", "", newcond3),
Time = sub(".*\\.", "", newcond3),
Group = factor(Group, levels = c("NO", "OB", "POB")),
Time = factor(Time, levels = c("f", "h")),
FillColor = case_when(
Group == "NO" ~ "dodgerblue",
Group == "OB" ~ "orange",
Group == "POB" ~ "forestgreen"
),
AlphaVal = ifelse(Time == "f", 0.3, 0.8),
Cond = newcond3
)
# 検定対象の条件ペア
comparisons <- list(c("NO.f", "NO.h"), c("OB.f", "OB.h"), c("POB.f", "POB.h"))
# 各ペアでt検定し、p値とアスタリスク記号を取得
pvals <- sapply(comparisons, function(pair) {
vals1 <- plot_df$Expression[plot_df$Cond == pair[1]]
vals2 <- plot_df$Expression[plot_df$Cond == pair[2]]
if (length(vals1) >= 2 && length(vals2) >= 2) {
t.test(vals1, vals2)$p.value
} else {
NA
}
})
# アスタリスクレベルを決定
stars <- sapply(pvals, function(p) {
if (is.na(p)) return(NA)
else if (p < 0.001) return("***")
else if (p < 0.01) return("**")
else if (p < 0.05) return("*")
else return(NA)
})
# 有意な比較のみ抽出
sig_comparisons <- comparisons[!is.na(stars)]
sig_stars <- stars[!is.na(stars)]
# プロット
ggplot(plot_df, aes(x = Cond, y = Expression, fill = FillColor, alpha = AlphaVal)) +
geom_boxplot(outlier.alpha = 0.2, color = "gray30") +
geom_jitter(width = 0.2, size = 2) +
scale_fill_identity() +
scale_alpha_identity() +
geom_signif(
comparisons = sig_comparisons,
annotations = sig_stars,
tip_length = 0.01,
textsize = 5,
color = "black" # 横棒とアスタリスクを黒で描画
) +
theme_minimal() +
labs(
title = paste0(target_gene, " expression (CPM) across conditions"),
x = "Condition", y = "CPM (Counts per Million)"
)
library(ggplot2)
library(dplyr)
library(edgeR)
library(ggsignif)
# CPMに変換
cpm_matrix <- cpm(dge, log = FALSE)
# 遺伝子指定
target_gene <- "PPARGC1A"
tss_rows <- grep(paste0("^", target_gene), rownames(cpm_matrix), value = TRUE)
expr_values <- colSums(cpm_matrix[tss_rows, , drop = FALSE])
# データ整形
plot_df <- data.frame(Sample = names(expr_values), Expression = expr_values)
plot_df <- left_join(plot_df, cohort, by = c("Sample" = "Cond"))
# 条件情報を加工
plot_df <- plot_df %>%
mutate(
Group = sub("\\..*", "", newcond3),
Time = sub(".*\\.", "", newcond3),
Group = factor(Group, levels = c("NO", "OB", "POB")),
Time = factor(Time, levels = c("f", "h")),
FillColor = case_when(
Group == "NO" ~ "dodgerblue",
Group == "OB" ~ "orange",
Group == "POB" ~ "forestgreen"
),
AlphaVal = ifelse(Time == "f", 0.3, 0.8),
Cond = newcond3
)
# 検定対象の条件ペア
comparisons <- list(c("NO.f", "NO.h"), c("OB.f", "OB.h"), c("POB.f", "POB.h"))
# 各ペアでt検定し、p値とアスタリスク記号を取得
pvals <- sapply(comparisons, function(pair) {
vals1 <- plot_df$Expression[plot_df$Cond == pair[1]]
vals2 <- plot_df$Expression[plot_df$Cond == pair[2]]
if (length(vals1) >= 2 && length(vals2) >= 2) {
t.test(vals1, vals2)$p.value
} else {
NA
}
})
# アスタリスクレベルを決定
stars <- sapply(pvals, function(p) {
if (is.na(p)) return(NA)
else if (p < 0.001) return("***")
else if (p < 0.01) return("**")
else if (p < 0.05) return("*")
else return(NA)
})
# 有意な比較のみ抽出
sig_comparisons <- comparisons[!is.na(stars)]
sig_stars <- stars[!is.na(stars)]
# プロット
ggplot(plot_df, aes(x = Cond, y = Expression, fill = FillColor, alpha = AlphaVal)) +
geom_boxplot(outlier.alpha = 0.2, color = "gray30") +
geom_jitter(width = 0.2, size = 2) +
scale_fill_identity() +
scale_alpha_identity() +
geom_signif(
comparisons = sig_comparisons,
annotations = sig_stars,
tip_length = 0.01,
textsize = 5,
color = "black" # 横棒とアスタリスクを黒で描画
) +
theme_minimal() +
labs(
title = paste0(target_gene, " expression (CPM) across conditions"),
x = "Condition", y = "CPM (Counts per Million)"
)
library(ggplot2)
library(dplyr)
library(edgeR)
library(ggsignif)
# CPMに変換
cpm_matrix <- cpm(dge, log = FALSE)
# 遺伝子指定
target_gene <- "PPARG"
tss_rows <- grep(paste0("^", target_gene), rownames(cpm_matrix), value = TRUE)
expr_values <- colSums(cpm_matrix[tss_rows, , drop = FALSE])
# データ整形
plot_df <- data.frame(Sample = names(expr_values), Expression = expr_values)
plot_df <- left_join(plot_df, cohort, by = c("Sample" = "Cond"))
# 条件情報を加工
plot_df <- plot_df %>%
mutate(
Group = sub("\\..*", "", newcond3),
Time = sub(".*\\.", "", newcond3),
Group = factor(Group, levels = c("NO", "OB", "POB")),
Time = factor(Time, levels = c("f", "h")),
FillColor = case_when(
Group == "NO" ~ "dodgerblue",
Group == "OB" ~ "orange",
Group == "POB" ~ "forestgreen"
),
AlphaVal = ifelse(Time == "f", 0.3, 0.8),
Cond = newcond3
)
# 検定対象の条件ペア
comparisons <- list(c("NO.f", "NO.h"), c("OB.f", "OB.h"), c("POB.f", "POB.h"))
# 各ペアでt検定し、p値とアスタリスク記号を取得
pvals <- sapply(comparisons, function(pair) {
vals1 <- plot_df$Expression[plot_df$Cond == pair[1]]
vals2 <- plot_df$Expression[plot_df$Cond == pair[2]]
if (length(vals1) >= 2 && length(vals2) >= 2) {
t.test(vals1, vals2)$p.value
} else {
NA
}
})
# アスタリスクレベルを決定
stars <- sapply(pvals, function(p) {
if (is.na(p)) return(NA)
else if (p < 0.001) return("***")
else if (p < 0.01) return("**")
else if (p < 0.05) return("*")
else return(NA)
})
# 有意な比較のみ抽出
sig_comparisons <- comparisons[!is.na(stars)]
sig_stars <- stars[!is.na(stars)]
# プロット
ggplot(plot_df, aes(x = Cond, y = Expression, fill = FillColor, alpha = AlphaVal)) +
geom_boxplot(outlier.alpha = 0.2, color = "gray30") +
geom_jitter(width = 0.2, size = 2) +
scale_fill_identity() +
scale_alpha_identity() +
geom_signif(
comparisons = sig_comparisons,
annotations = sig_stars,
tip_length = 0.01,
textsize = 5,
color = "black" # 横棒とアスタリスクを黒で描画
) +
theme_minimal() +
labs(
title = paste0(target_gene, " expression (CPM) across conditions"),
x = "Condition", y = "CPM (Counts per Million)"
)
library(ggplot2)
library(dplyr)
library(edgeR)
library(ggsignif)
# CPMに変換
cpm_matrix <- cpm(dge, log = FALSE)
# 遺伝子指定
target_gene <- "ADIPOQ"
tss_rows <- grep(paste0("^", target_gene), rownames(cpm_matrix), value = TRUE)
expr_values <- colSums(cpm_matrix[tss_rows, , drop = FALSE])
# データ整形
plot_df <- data.frame(Sample = names(expr_values), Expression = expr_values)
plot_df <- left_join(plot_df, cohort, by = c("Sample" = "Cond"))
# 条件情報を加工
plot_df <- plot_df %>%
mutate(
Group = sub("\\..*", "", newcond3),
Time = sub(".*\\.", "", newcond3),
Group = factor(Group, levels = c("NO", "OB", "POB")),
Time = factor(Time, levels = c("f", "h")),
FillColor = case_when(
Group == "NO" ~ "dodgerblue",
Group == "OB" ~ "orange",
Group == "POB" ~ "forestgreen"
),
AlphaVal = ifelse(Time == "f", 0.3, 0.8),
Cond = newcond3
)
# 検定対象の条件ペア
comparisons <- list(c("NO.f", "NO.h"), c("OB.f", "OB.h"), c("POB.f", "POB.h"))
# 各ペアでt検定し、p値とアスタリスク記号を取得
pvals <- sapply(comparisons, function(pair) {
vals1 <- plot_df$Expression[plot_df$Cond == pair[1]]
vals2 <- plot_df$Expression[plot_df$Cond == pair[2]]
if (length(vals1) >= 2 && length(vals2) >= 2) {
t.test(vals1, vals2)$p.value
} else {
NA
}
})
# アスタリスクレベルを決定
stars <- sapply(pvals, function(p) {
if (is.na(p)) return(NA)
else if (p < 0.001) return("***")
else if (p < 0.01) return("**")
else if (p < 0.05) return("*")
else return(NA)
})
# 有意な比較のみ抽出
sig_comparisons <- comparisons[!is.na(stars)]
sig_stars <- stars[!is.na(stars)]
# プロット
p <- ggplot(plot_df, aes(x = Cond, y = Expression, fill = FillColor, alpha = AlphaVal)) +
geom_boxplot(outlier.alpha = 0.2, color = "gray30") +
geom_jitter(width = 0.2, size = 2) +
scale_fill_identity() +
scale_alpha_identity() +
theme_minimal() +
labs(
title = paste0(target_gene, " expression (CPM) across conditions"),
x = "Condition", y = "CPM (Counts per Million)"
)
# アスタリスクがあれば追加する
if (length(sig_stars) > 0) {
p <- p + geom_signif(
comparisons = sig_comparisons,
annotations = sig_stars,
tip_length = 0.01,
textsize = 5,
color = "black"
)
}
# 表示
print(p)
changed the code because without significant difference the former code
put out an error and cannnot plot.
library(ggplot2)
library(dplyr)
library(edgeR)
library(ggsignif)
# CPMに変換
cpm_matrix <- cpm(dge, log = FALSE)
# 遺伝子指定
target_gene <- "IL6"
tss_rows <- grep(paste0("^", target_gene), rownames(cpm_matrix), value = TRUE)
expr_values <- colSums(cpm_matrix[tss_rows, , drop = FALSE])
# データ整形
plot_df <- data.frame(Sample = names(expr_values), Expression = expr_values)
plot_df <- left_join(plot_df, cohort, by = c("Sample" = "Cond"))
# 条件情報を加工
plot_df <- plot_df %>%
mutate(
Group = sub("\\..*", "", newcond3),
Time = sub(".*\\.", "", newcond3),
Group = factor(Group, levels = c("NO", "OB", "POB")),
Time = factor(Time, levels = c("f", "h")),
FillColor = case_when(
Group == "NO" ~ "dodgerblue",
Group == "OB" ~ "orange",
Group == "POB" ~ "forestgreen"
),
AlphaVal = ifelse(Time == "f", 0.3, 0.8),
Cond = newcond3
)
# 検定対象の条件ペア
comparisons <- list(c("NO.f", "NO.h"), c("OB.f", "OB.h"), c("POB.f", "POB.h"))
# 各ペアでt検定し、p値とアスタリスク記号を取得
pvals <- sapply(comparisons, function(pair) {
vals1 <- plot_df$Expression[plot_df$Cond == pair[1]]
vals2 <- plot_df$Expression[plot_df$Cond == pair[2]]
if (length(vals1) >= 2 && length(vals2) >= 2) {
t.test(vals1, vals2)$p.value
} else {
NA
}
})
# アスタリスクレベルを決定
stars <- sapply(pvals, function(p) {
if (is.na(p)) return(NA)
else if (p < 0.001) return("***")
else if (p < 0.01) return("**")
else if (p < 0.05) return("*")
else return(NA)
})
# 有意な比較のみ抽出
sig_comparisons <- comparisons[!is.na(stars)]
sig_stars <- stars[!is.na(stars)]
# プロット
p <- ggplot(plot_df, aes(x = Cond, y = Expression, fill = FillColor, alpha = AlphaVal)) +
geom_boxplot(outlier.alpha = 0.2, color = "gray30") +
geom_jitter(width = 0.2, size = 2) +
scale_fill_identity() +
scale_alpha_identity() +
theme_minimal() +
labs(
title = paste0(target_gene, " expression (CPM) across conditions"),
x = "Condition", y = "CPM (Counts per Million)"
)
# アスタリスクがあれば追加する
if (length(sig_stars) > 0) {
p <- p + geom_signif(
comparisons = sig_comparisons,
annotations = sig_stars,
tip_length = 0.01,
textsize = 5,
color = "black"
)
}
# 表示
print(p)
TSS
library(ineq)
library(entropy)
##
## Attaching package: 'entropy'
## The following object is masked from 'package:ineq':
##
## entropy
# 1. 読み込み
exp_raw <- read.table("ExpressionTable_annotated.txt", header = TRUE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
# 2. NAを除外して処理
exp_clean <- exp_raw[!is.na(exp_raw[, 1]), ]
rownames(exp_clean) <- make.unique(exp_clean[, 1])
exp_matrix <- apply(exp_clean[, -1], 2, as.numeric)
# 3. ベース遺伝子名を取得(.1, .2 を削除)
base_gene <- sub("\\.\\d+$", "", rownames(exp_clean))
# 4. 各ベース遺伝子ごとにTSSの分布から多様性指標を計算
calc_tss_diversity <- function(expr_mat, base_names) {
gene_list <- unique(base_names)
results <- data.frame(Gene = gene_list, NumTSS = NA, Entropy = NA, Gini = NA)
for (i in seq_along(gene_list)) {
g <- gene_list[i]
idx <- which(base_names == g)
if (length(idx) < 2) next # 単一TSSは除外 or Entropy=0とする
# 各TSSの発現量を合計(Across samples)
tss_expr <- rowSums(expr_mat[idx, , drop = FALSE], na.rm = TRUE)
# 割合(確率分布)に変換
tss_frac <- tss_expr / sum(tss_expr)
# Shannon entropy
ent <- entropy::entropy(tss_frac, unit = "log2") # 情報量
# Gini係数(偏り)
gini <- ineq::Gini(tss_frac)
results[i, "NumTSS"] <- length(idx)
results[i, "Entropy"] <- ent
results[i, "Gini"] <- gini
}
return(na.omit(results))
}
# 実行
diversity_df <- calc_tss_diversity(exp_matrix, base_gene)
# 確認
head(diversity_df)
## Gene NumTSS Entropy Gini
## 1 MTND1P23 4 0.4130105 0.7007458
## 2 MTATP6P1 55 3.8355813 0.7821932
## 4 RP11-206L10 2 0.7723556 0.2732201
## 7 AGRN 2 0.3626989 0.4308619
## 8 TTLL10 2 0.8149603 0.2476636
## 12 RP4-758J18 2 0.8452708 0.2273294
library(ggplot2)
# 1. NumTSS > 40 の遺伝子だけ抽出
many_tss <- diversity_df[diversity_df$NumTSS > 40, ]
# 2. プロット本体
ggplot(diversity_df, aes(x = NumTSS, y = Entropy)) +
geom_jitter(width = 0.2, alpha = 0.6) +
geom_smooth(method = "lm") +
geom_text_repel(data = many_tss, aes(label = Gene), size = 3, color = "red") +
theme_minimal() +
labs(
title = "TSS number and Shannon entropy",
x = "TSS number",
y = "Shannon Entropy"
)
## `geom_smooth()` using formula = 'y ~ x'
# 例2: Gini vs Entropy(相関チェック)
ggplot(diversity_df, aes(x = Gini, y = Entropy)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm") +
theme_classic() +
labs(title = "TSS usage bias (Gini) and Entropy", x = "Gini coefficient", y = "Shannon Entropy")
## `geom_smooth()` using formula = 'y ~ x'
# logFCをres_NOから取得(行名=Gene名)
logfc_vec <- res_NO$logFC
names(logfc_vec) <- rownames(res_NO)
# Shannon/Gini用に使うベース遺伝子名と対応
diversity_df$logFC <- logfc_vec[as.character(diversity_df$Gene)]
# フィルター(必要ならNA除去)
plot_df <- na.omit(diversity_df)
# scatter plot
# 発現量(exp_matrix)からベース遺伝子ごとに合計
total_expr <- tapply(rowSums(exp_matrix, na.rm = TRUE), base_gene, sum)
diversity_df$TotalExpression <- total_expr[diversity_df$Gene]
# 散布図(log scale)
library(ggplot2)
ggplot(diversity_df, aes(x = NumTSS, y = log10(TotalExpression + 1))) +
geom_jitter(width = 0.2, alpha = 0.6) +
geom_smooth(method = "lm") +
theme_minimal() +
labs(title = "TSS number and expression (NO)", x = "TSS number", y = "log10(expression + 1)")
## `geom_smooth()` using formula = 'y ~ x'
shannon entropy
ggplot(plot_df, aes(x = Entropy, y = logFC)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm") +
theme_classic() +
labs(title = "Shannon Entropy and fold change (NO)", x = "Shannon Entropy", y = "log2 Fold Change")
## `geom_smooth()` using formula = 'y ~ x'
nothing interesting.
ggplot(plot_df, aes(x = as.factor(NumTSS), y = logFC)) +
geom_boxplot(outlier.alpha = 0.3) +
theme_bw() +
labs(title = "fold change (NO) by TSS number", x = "TSS number", y = "log2 Fold Change")
nothing interesting.
Gini coefficient
plot_df$GiniGroup <- cut(plot_df$Gini, breaks = quantile(plot_df$Gini, probs = c(0, 0.33, 0.67, 1)),
labels = c("Low", "Medium", "High"), include.lowest = TRUE)
ggplot(plot_df, aes(x = GiniGroup, y = logFC)) +
geom_boxplot(outlier.alpha = 0.3) +
theme_bw() +
labs(title = "Gini coefficient and fold change (NO)", x = "Gini Group", y = "log2 Fold Change")
no difference.